PK training_args/data.pklFBZZZZZZZZ€cllamafactory.hparams.training_args TrainingArguments q)q}q(X output_dirqX'/root/autodl-tmp/output/Qwen2.5-7B-loraqXoverwrite_output_dirqˆXdo_trainqˆXdo_evalqˆX do_predictq‰X eval_strategyq ctransformers.trainer_utils IntervalStrategy q Xstepsq …q Rq Xprediction_loss_onlyq‰Xper_device_train_batch_sizeqKXper_device_eval_batch_sizeqKXper_gpu_train_batch_sizeqNXper_gpu_eval_batch_sizeqNXgradient_accumulation_stepsqKXeval_accumulation_stepsqNX eval_delayqKXtorch_empty_cache_stepsqNX learning_rateqG? 6âëC-X weight_decayqGX adam_beta1qG?ìÌÌÌÌÌÍX adam_beta2qG?ï÷ÎÙ‡+X adam_epsilonqG>EyŽâ0Œ:X max_grad_normqG?ðXnum_train_epochsqG@X max_stepsqJÿÿÿÿXlr_scheduler_typeqctransformers.trainer_utils SchedulerType q Xcosineq!…q"Rq#Xlr_scheduler_kwargsq$}q%X warmup_ratioq&GX warmup_stepsq'KX log_levelq(Xpassiveq)Xlog_level_replicaq*Xwarningq+Xlog_on_each_nodeq,ˆX logging_dirq-X`/root/autodl-tmp/output/Qwen2.5-7B-lora/runs/Feb21_19-49-21_autodl-container-f1cc40b633-190867f3q.Xlogging_strategyq/h Xlogging_first_stepq0‰X logging_stepsq1K2Xlogging_nan_inf_filterq2ˆX save_strategyq3ctransformers.trainer_utils SaveStrategy q4h …q5Rq6X save_stepsq7KdXsave_total_limitq8NXsave_safetensorsq9ˆXsave_on_each_nodeq:‰Xsave_only_modelq;‰X'restore_callback_states_from_checkpointq<‰Xno_cudaq=‰Xuse_cpuq>‰Xuse_mps_deviceq?‰Xseedq@K*X data_seedqANX jit_mode_evalqB‰Xuse_ipexqC‰Xbf16qD‰Xfp16qEˆXfp16_opt_levelqFXO1qGXhalf_precision_backendqHXautoqIXbf16_full_evalqJ‰Xfp16_full_evalqK‰Xtf32qLNX local_rankqMKX ddp_backendqNNX tpu_num_coresqONXtpu_metrics_debugqP‰XdebugqQ]qRXdataloader_drop_lastqS‰X eval_stepsqTK2Xdataloader_num_workersqUKXdataloader_prefetch_factorqVNX past_indexqWJÿÿÿÿXrun_nameqXhX disable_tqdmqY‰Xremove_unused_columnsqZ‰X label_namesq[NXload_best_model_at_endq\ˆXmetric_for_best_modelq]Xlossq^Xgreater_is_betterq_‰Xignore_data_skipq`‰Xfsdpqa]qbXfsdp_min_num_paramsqcKX fsdp_configqd}qe(Xmin_num_paramsqfKXxlaqg‰X xla_fsdp_v2qh‰Xxla_fsdp_grad_ckptqi‰uX"fsdp_transformer_layer_cls_to_wrapqjNXaccelerator_configqkctransformers.trainer_pt_utils AcceleratorConfig ql)qm}qn(X split_batchesqo‰Xdispatch_batchesqpNX even_batchesqqˆXuse_seedable_samplerqrˆX non_blockingqs‰Xgradient_accumulation_kwargsqtNubX deepspeedquNXlabel_smoothing_factorqvGXoptimqwctransformers.training_args OptimizerNames qxX adamw_torchqy…qzRq{X optim_argsq|NX adafactorq}‰Xgroup_by_lengthq~‰Xlength_column_nameqXlengthq€X report_toq]q‚X tensorboardqƒaXddp_find_unused_parametersq„NXddp_bucket_cap_mbq…NXddp_broadcast_buffersq†NXdataloader_pin_memoryq‡ˆXdataloader_persistent_workersqˆ‰Xskip_memory_metricsq‰ˆXuse_legacy_prediction_loopqЉX push_to_hubq‹‰Xresume_from_checkpointqŒNX hub_model_idqNX hub_strategyqŽctransformers.trainer_utils HubStrategy qX every_saveq…q‘Rq’X hub_tokenq“NXhub_private_repoq”NXhub_always_pushq•‰Xgradient_checkpointingq–‰Xgradient_checkpointing_kwargsq—NXinclude_inputs_for_metricsq˜‰Xinclude_for_metricsq™]qšXeval_do_concat_batchesq›ˆX fp16_backendqœhIXevaluation_strategyqh Xpush_to_hub_model_idqžNXpush_to_hub_organizationqŸNXpush_to_hub_tokenq NX mp_parametersq¡Xq¢Xauto_find_batch_sizeq£‰Xfull_determinismq¤‰X torchdynamoq¥NX ray_scopeq¦Xlastq§X ddp_timeoutq¨MX torch_compileq©‰Xtorch_compile_backendqªNXtorch_compile_modeq«NhpNhoNXinclude_tokens_per_secondq¬‰Xinclude_num_input_tokens_seenq­‰Xneftune_noise_alphaq®NXoptim_target_modulesq¯NXbatch_eval_metricsq°‰X eval_on_startq±‰Xuse_liger_kernelq²‰Xeval_use_gather_objectq³‰Xaverage_tokens_across_devicesq´‰Xsortish_samplerqµ‰Xpredict_with_generateq¶‰Xgeneration_max_lengthq·MXgeneration_num_beamsq¸NXgeneration_configq¹NX ray_run_nameqºNXray_num_workersq»KXresources_per_workerq¼}q½XGPUq¾KsXplacement_strategyq¿XPACKqÀXdistributed_stateqÁcaccelerate.state PartialState qÂ)qÃ}qÄ(X_cpuqʼnXbackendqÆNXdeviceqÇctorch device qÈXcudaqÉ…qÊRqËhQ‰Xdistributed_typeqÌcaccelerate.utils.dataclasses DistributedType qÍXNOqÎ…qÏRqÐX num_processesqÑKX process_indexqÒKXlocal_process_indexqÓKX fork_launchedqÔ‰ubX_n_gpuqÕKX__cached__setup_devicesqÖhÈXcudaq×K†qØRqÙXdeepspeed_pluginqÚNXuse_rayqÛ‰ub.PKS¡‹dÅÅPK6training_args/byteorderFB2ZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZlittlePK…=ãPK7training_args/versionFB3ZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ3 PKÑžgUPK$,training_args/.data/serialization_idFB(ZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ0636457737946401051300000018121970198592PKuduà((PKS¡‹dÅÅtraining_args/data.pklPK…=ãtraining_args/byteorderPKÑžgU–training_args/versionPKuduà(($training_args/.data/serialization_idPK,-¸PKÖPK¸