{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9983052215317567, "eval_steps": 500, "global_step": 4644, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.4285714285714287e-07, "loss": 1.5165, "step": 1 }, { "epoch": 0.0, "learning_rate": 2.8571428571428575e-07, "loss": 1.5417, "step": 2 }, { "epoch": 0.0, "learning_rate": 4.285714285714286e-07, "loss": 1.5569, "step": 3 }, { "epoch": 0.0, "learning_rate": 5.714285714285715e-07, "loss": 1.5622, "step": 4 }, { "epoch": 0.0, "learning_rate": 7.142857142857143e-07, "loss": 1.547, "step": 5 }, { "epoch": 0.0, "learning_rate": 8.571428571428572e-07, "loss": 1.5333, "step": 6 }, { "epoch": 0.0, "learning_rate": 1.0000000000000002e-06, "loss": 1.513, "step": 7 }, { "epoch": 0.01, "learning_rate": 1.142857142857143e-06, "loss": 1.5043, "step": 8 }, { "epoch": 0.01, "learning_rate": 1.2857142857142856e-06, "loss": 1.4858, "step": 9 }, { "epoch": 0.01, "learning_rate": 1.4285714285714286e-06, "loss": 1.4728, "step": 10 }, { "epoch": 0.01, "learning_rate": 1.5714285714285714e-06, "loss": 1.4254, "step": 11 }, { "epoch": 0.01, "learning_rate": 1.7142857142857145e-06, "loss": 1.3864, "step": 12 }, { "epoch": 0.01, "learning_rate": 1.8571428571428573e-06, "loss": 1.2765, "step": 13 }, { "epoch": 0.01, "learning_rate": 2.0000000000000003e-06, "loss": 1.2683, "step": 14 }, { "epoch": 0.01, "learning_rate": 2.1428571428571427e-06, "loss": 1.2652, "step": 15 }, { "epoch": 0.01, "learning_rate": 2.285714285714286e-06, "loss": 1.204, "step": 16 }, { "epoch": 0.01, "learning_rate": 2.428571428571429e-06, "loss": 1.0444, "step": 17 }, { "epoch": 0.01, "learning_rate": 2.571428571428571e-06, "loss": 0.9532, "step": 18 }, { "epoch": 0.01, "learning_rate": 2.7142857142857144e-06, "loss": 0.9477, "step": 19 }, { "epoch": 0.01, "learning_rate": 2.8571428571428573e-06, "loss": 0.9128, "step": 20 }, { "epoch": 0.01, "learning_rate": 3e-06, "loss": 0.8812, "step": 21 }, { "epoch": 0.01, "learning_rate": 3.142857142857143e-06, "loss": 0.8899, "step": 22 }, { "epoch": 0.01, "learning_rate": 3.285714285714286e-06, "loss": 0.8595, "step": 23 }, { "epoch": 0.02, "learning_rate": 3.428571428571429e-06, "loss": 0.79, "step": 24 }, { "epoch": 0.02, "learning_rate": 3.5714285714285718e-06, "loss": 0.7468, "step": 25 }, { "epoch": 0.02, "learning_rate": 3.7142857142857146e-06, "loss": 0.746, "step": 26 }, { "epoch": 0.02, "learning_rate": 3.857142857142858e-06, "loss": 0.7081, "step": 27 }, { "epoch": 0.02, "learning_rate": 4.000000000000001e-06, "loss": 0.7229, "step": 28 }, { "epoch": 0.02, "learning_rate": 4.1428571428571435e-06, "loss": 0.7201, "step": 29 }, { "epoch": 0.02, "learning_rate": 4.2857142857142855e-06, "loss": 0.6939, "step": 30 }, { "epoch": 0.02, "learning_rate": 4.428571428571429e-06, "loss": 0.6923, "step": 31 }, { "epoch": 0.02, "learning_rate": 4.571428571428572e-06, "loss": 0.6934, "step": 32 }, { "epoch": 0.02, "learning_rate": 4.714285714285715e-06, "loss": 0.719, "step": 33 }, { "epoch": 0.02, "learning_rate": 4.857142857142858e-06, "loss": 0.6944, "step": 34 }, { "epoch": 0.02, "learning_rate": 5e-06, "loss": 0.6601, "step": 35 }, { "epoch": 0.02, "learning_rate": 5.142857142857142e-06, "loss": 0.6701, "step": 36 }, { "epoch": 0.02, "learning_rate": 5.285714285714286e-06, "loss": 0.6554, "step": 37 }, { "epoch": 0.02, "learning_rate": 5.428571428571429e-06, "loss": 0.6637, "step": 38 }, { "epoch": 0.03, "learning_rate": 5.571428571428572e-06, "loss": 0.6261, "step": 39 }, { "epoch": 0.03, "learning_rate": 5.7142857142857145e-06, "loss": 0.6468, "step": 40 }, { "epoch": 0.03, "learning_rate": 5.857142857142858e-06, "loss": 0.6304, "step": 41 }, { "epoch": 0.03, "learning_rate": 6e-06, "loss": 0.6452, "step": 42 }, { "epoch": 0.03, "learning_rate": 6.142857142857144e-06, "loss": 0.6445, "step": 43 }, { "epoch": 0.03, "learning_rate": 6.285714285714286e-06, "loss": 0.6151, "step": 44 }, { "epoch": 0.03, "learning_rate": 6.4285714285714295e-06, "loss": 0.6259, "step": 45 }, { "epoch": 0.03, "learning_rate": 6.571428571428572e-06, "loss": 0.6333, "step": 46 }, { "epoch": 0.03, "learning_rate": 6.714285714285714e-06, "loss": 0.6171, "step": 47 }, { "epoch": 0.03, "learning_rate": 6.857142857142858e-06, "loss": 0.6167, "step": 48 }, { "epoch": 0.03, "learning_rate": 7e-06, "loss": 0.6133, "step": 49 }, { "epoch": 0.03, "learning_rate": 7.1428571428571436e-06, "loss": 0.6136, "step": 50 }, { "epoch": 0.03, "learning_rate": 7.285714285714286e-06, "loss": 0.6283, "step": 51 }, { "epoch": 0.03, "learning_rate": 7.428571428571429e-06, "loss": 0.6231, "step": 52 }, { "epoch": 0.03, "learning_rate": 7.571428571428572e-06, "loss": 0.6077, "step": 53 }, { "epoch": 0.03, "learning_rate": 7.714285714285716e-06, "loss": 0.6312, "step": 54 }, { "epoch": 0.04, "learning_rate": 7.857142857142858e-06, "loss": 0.5998, "step": 55 }, { "epoch": 0.04, "learning_rate": 8.000000000000001e-06, "loss": 0.623, "step": 56 }, { "epoch": 0.04, "learning_rate": 8.142857142857143e-06, "loss": 0.5723, "step": 57 }, { "epoch": 0.04, "learning_rate": 8.285714285714287e-06, "loss": 0.5794, "step": 58 }, { "epoch": 0.04, "learning_rate": 8.428571428571429e-06, "loss": 0.621, "step": 59 }, { "epoch": 0.04, "learning_rate": 8.571428571428571e-06, "loss": 0.6107, "step": 60 }, { "epoch": 0.04, "learning_rate": 8.714285714285715e-06, "loss": 0.5981, "step": 61 }, { "epoch": 0.04, "learning_rate": 8.857142857142858e-06, "loss": 0.607, "step": 62 }, { "epoch": 0.04, "learning_rate": 9e-06, "loss": 0.5786, "step": 63 }, { "epoch": 0.04, "learning_rate": 9.142857142857144e-06, "loss": 0.5694, "step": 64 }, { "epoch": 0.04, "learning_rate": 9.285714285714288e-06, "loss": 0.592, "step": 65 }, { "epoch": 0.04, "learning_rate": 9.42857142857143e-06, "loss": 0.6035, "step": 66 }, { "epoch": 0.04, "learning_rate": 9.571428571428573e-06, "loss": 0.5961, "step": 67 }, { "epoch": 0.04, "learning_rate": 9.714285714285715e-06, "loss": 0.5898, "step": 68 }, { "epoch": 0.04, "learning_rate": 9.857142857142859e-06, "loss": 0.5731, "step": 69 }, { "epoch": 0.05, "learning_rate": 1e-05, "loss": 0.6077, "step": 70 }, { "epoch": 0.05, "learning_rate": 1.0142857142857143e-05, "loss": 0.5916, "step": 71 }, { "epoch": 0.05, "learning_rate": 1.0285714285714285e-05, "loss": 0.5855, "step": 72 }, { "epoch": 0.05, "learning_rate": 1.042857142857143e-05, "loss": 0.5863, "step": 73 }, { "epoch": 0.05, "learning_rate": 1.0571428571428572e-05, "loss": 0.6148, "step": 74 }, { "epoch": 0.05, "learning_rate": 1.0714285714285714e-05, "loss": 0.5721, "step": 75 }, { "epoch": 0.05, "learning_rate": 1.0857142857142858e-05, "loss": 0.5798, "step": 76 }, { "epoch": 0.05, "learning_rate": 1.1000000000000001e-05, "loss": 0.6096, "step": 77 }, { "epoch": 0.05, "learning_rate": 1.1142857142857143e-05, "loss": 0.566, "step": 78 }, { "epoch": 0.05, "learning_rate": 1.1285714285714287e-05, "loss": 0.5704, "step": 79 }, { "epoch": 0.05, "learning_rate": 1.1428571428571429e-05, "loss": 0.5707, "step": 80 }, { "epoch": 0.05, "learning_rate": 1.1571428571428573e-05, "loss": 0.5707, "step": 81 }, { "epoch": 0.05, "learning_rate": 1.1714285714285716e-05, "loss": 0.5624, "step": 82 }, { "epoch": 0.05, "learning_rate": 1.1857142857142858e-05, "loss": 0.5578, "step": 83 }, { "epoch": 0.05, "learning_rate": 1.2e-05, "loss": 0.5642, "step": 84 }, { "epoch": 0.05, "learning_rate": 1.2142857142857142e-05, "loss": 0.5774, "step": 85 }, { "epoch": 0.06, "learning_rate": 1.2285714285714288e-05, "loss": 0.5866, "step": 86 }, { "epoch": 0.06, "learning_rate": 1.242857142857143e-05, "loss": 0.5866, "step": 87 }, { "epoch": 0.06, "learning_rate": 1.2571428571428572e-05, "loss": 0.5841, "step": 88 }, { "epoch": 0.06, "learning_rate": 1.2714285714285715e-05, "loss": 0.5704, "step": 89 }, { "epoch": 0.06, "learning_rate": 1.2857142857142859e-05, "loss": 0.5707, "step": 90 }, { "epoch": 0.06, "learning_rate": 1.3000000000000001e-05, "loss": 0.5865, "step": 91 }, { "epoch": 0.06, "learning_rate": 1.3142857142857145e-05, "loss": 0.5519, "step": 92 }, { "epoch": 0.06, "learning_rate": 1.3285714285714287e-05, "loss": 0.5732, "step": 93 }, { "epoch": 0.06, "learning_rate": 1.3428571428571429e-05, "loss": 0.5998, "step": 94 }, { "epoch": 0.06, "learning_rate": 1.3571428571428574e-05, "loss": 0.5698, "step": 95 }, { "epoch": 0.06, "learning_rate": 1.3714285714285716e-05, "loss": 0.553, "step": 96 }, { "epoch": 0.06, "learning_rate": 1.3857142857142858e-05, "loss": 0.5586, "step": 97 }, { "epoch": 0.06, "learning_rate": 1.4e-05, "loss": 0.5768, "step": 98 }, { "epoch": 0.06, "learning_rate": 1.4142857142857145e-05, "loss": 0.5627, "step": 99 }, { "epoch": 0.06, "learning_rate": 1.4285714285714287e-05, "loss": 0.5818, "step": 100 }, { "epoch": 0.07, "learning_rate": 1.4428571428571429e-05, "loss": 0.5732, "step": 101 }, { "epoch": 0.07, "learning_rate": 1.4571428571428573e-05, "loss": 0.5758, "step": 102 }, { "epoch": 0.07, "learning_rate": 1.4714285714285716e-05, "loss": 0.5692, "step": 103 }, { "epoch": 0.07, "learning_rate": 1.4857142857142858e-05, "loss": 0.5738, "step": 104 }, { "epoch": 0.07, "learning_rate": 1.5000000000000002e-05, "loss": 0.5573, "step": 105 }, { "epoch": 0.07, "learning_rate": 1.5142857142857144e-05, "loss": 0.5287, "step": 106 }, { "epoch": 0.07, "learning_rate": 1.5285714285714286e-05, "loss": 0.5888, "step": 107 }, { "epoch": 0.07, "learning_rate": 1.542857142857143e-05, "loss": 0.5888, "step": 108 }, { "epoch": 0.07, "learning_rate": 1.5571428571428573e-05, "loss": 0.5507, "step": 109 }, { "epoch": 0.07, "learning_rate": 1.5714285714285715e-05, "loss": 0.5513, "step": 110 }, { "epoch": 0.07, "learning_rate": 1.5857142857142857e-05, "loss": 0.5749, "step": 111 }, { "epoch": 0.07, "learning_rate": 1.6000000000000003e-05, "loss": 0.598, "step": 112 }, { "epoch": 0.07, "learning_rate": 1.6142857142857145e-05, "loss": 0.5246, "step": 113 }, { "epoch": 0.07, "learning_rate": 1.6285714285714287e-05, "loss": 0.5636, "step": 114 }, { "epoch": 0.07, "learning_rate": 1.642857142857143e-05, "loss": 0.5435, "step": 115 }, { "epoch": 0.07, "learning_rate": 1.6571428571428574e-05, "loss": 0.5865, "step": 116 }, { "epoch": 0.08, "learning_rate": 1.6714285714285716e-05, "loss": 0.5478, "step": 117 }, { "epoch": 0.08, "learning_rate": 1.6857142857142858e-05, "loss": 0.5639, "step": 118 }, { "epoch": 0.08, "learning_rate": 1.7e-05, "loss": 0.5615, "step": 119 }, { "epoch": 0.08, "learning_rate": 1.7142857142857142e-05, "loss": 0.558, "step": 120 }, { "epoch": 0.08, "learning_rate": 1.7285714285714287e-05, "loss": 0.5402, "step": 121 }, { "epoch": 0.08, "learning_rate": 1.742857142857143e-05, "loss": 0.5589, "step": 122 }, { "epoch": 0.08, "learning_rate": 1.757142857142857e-05, "loss": 0.5893, "step": 123 }, { "epoch": 0.08, "learning_rate": 1.7714285714285717e-05, "loss": 0.558, "step": 124 }, { "epoch": 0.08, "learning_rate": 1.785714285714286e-05, "loss": 0.5624, "step": 125 }, { "epoch": 0.08, "learning_rate": 1.8e-05, "loss": 0.5643, "step": 126 }, { "epoch": 0.08, "learning_rate": 1.8142857142857146e-05, "loss": 0.5545, "step": 127 }, { "epoch": 0.08, "learning_rate": 1.8285714285714288e-05, "loss": 0.5571, "step": 128 }, { "epoch": 0.08, "learning_rate": 1.842857142857143e-05, "loss": 0.5666, "step": 129 }, { "epoch": 0.08, "learning_rate": 1.8571428571428575e-05, "loss": 0.5527, "step": 130 }, { "epoch": 0.08, "learning_rate": 1.8714285714285717e-05, "loss": 0.5663, "step": 131 }, { "epoch": 0.09, "learning_rate": 1.885714285714286e-05, "loss": 0.5734, "step": 132 }, { "epoch": 0.09, "learning_rate": 1.9e-05, "loss": 0.5537, "step": 133 }, { "epoch": 0.09, "learning_rate": 1.9142857142857146e-05, "loss": 0.5559, "step": 134 }, { "epoch": 0.09, "learning_rate": 1.928571428571429e-05, "loss": 0.582, "step": 135 }, { "epoch": 0.09, "learning_rate": 1.942857142857143e-05, "loss": 0.5588, "step": 136 }, { "epoch": 0.09, "learning_rate": 1.9571428571428572e-05, "loss": 0.5578, "step": 137 }, { "epoch": 0.09, "learning_rate": 1.9714285714285718e-05, "loss": 0.5592, "step": 138 }, { "epoch": 0.09, "learning_rate": 1.985714285714286e-05, "loss": 0.5438, "step": 139 }, { "epoch": 0.09, "learning_rate": 2e-05, "loss": 0.5777, "step": 140 }, { "epoch": 0.09, "learning_rate": 1.9999997567387307e-05, "loss": 0.5325, "step": 141 }, { "epoch": 0.09, "learning_rate": 1.9999990269550415e-05, "loss": 0.5643, "step": 142 }, { "epoch": 0.09, "learning_rate": 1.999997810649287e-05, "loss": 0.5806, "step": 143 }, { "epoch": 0.09, "learning_rate": 1.9999961078220587e-05, "loss": 0.5612, "step": 144 }, { "epoch": 0.09, "learning_rate": 1.999993918474186e-05, "loss": 0.5644, "step": 145 }, { "epoch": 0.09, "learning_rate": 1.9999912426067335e-05, "loss": 0.5737, "step": 146 }, { "epoch": 0.09, "learning_rate": 1.999988080221003e-05, "loss": 0.5334, "step": 147 }, { "epoch": 0.1, "learning_rate": 1.9999844313185335e-05, "loss": 0.541, "step": 148 }, { "epoch": 0.1, "learning_rate": 1.9999802959010998e-05, "loss": 0.5348, "step": 149 }, { "epoch": 0.1, "learning_rate": 1.999975673970714e-05, "loss": 0.5642, "step": 150 }, { "epoch": 0.1, "learning_rate": 1.999970565529625e-05, "loss": 0.5419, "step": 151 }, { "epoch": 0.1, "learning_rate": 1.9999649705803178e-05, "loss": 0.5634, "step": 152 }, { "epoch": 0.1, "learning_rate": 1.999958889125515e-05, "loss": 0.5292, "step": 153 }, { "epoch": 0.1, "learning_rate": 1.9999523211681746e-05, "loss": 0.5439, "step": 154 }, { "epoch": 0.1, "learning_rate": 1.999945266711493e-05, "loss": 0.5653, "step": 155 }, { "epoch": 0.1, "learning_rate": 1.9999377257589012e-05, "loss": 0.5286, "step": 156 }, { "epoch": 0.1, "learning_rate": 1.9999296983140692e-05, "loss": 0.536, "step": 157 }, { "epoch": 0.1, "learning_rate": 1.9999211843809018e-05, "loss": 0.5591, "step": 158 }, { "epoch": 0.1, "learning_rate": 1.9999121839635416e-05, "loss": 0.551, "step": 159 }, { "epoch": 0.1, "learning_rate": 1.999902697066367e-05, "loss": 0.5677, "step": 160 }, { "epoch": 0.1, "learning_rate": 1.9998927236939943e-05, "loss": 0.5327, "step": 161 }, { "epoch": 0.1, "learning_rate": 1.9998822638512757e-05, "loss": 0.5346, "step": 162 }, { "epoch": 0.11, "learning_rate": 1.9998713175432993e-05, "loss": 0.548, "step": 163 }, { "epoch": 0.11, "learning_rate": 1.9998598847753918e-05, "loss": 0.5638, "step": 164 }, { "epoch": 0.11, "learning_rate": 1.9998479655531145e-05, "loss": 0.5493, "step": 165 }, { "epoch": 0.11, "learning_rate": 1.999835559882267e-05, "loss": 0.5772, "step": 166 }, { "epoch": 0.11, "learning_rate": 1.9998226677688847e-05, "loss": 0.5593, "step": 167 }, { "epoch": 0.11, "learning_rate": 1.9998092892192403e-05, "loss": 0.5432, "step": 168 }, { "epoch": 0.11, "learning_rate": 1.9997954242398422e-05, "loss": 0.573, "step": 169 }, { "epoch": 0.11, "learning_rate": 1.9997810728374362e-05, "loss": 0.5282, "step": 170 }, { "epoch": 0.11, "learning_rate": 1.999766235019005e-05, "loss": 0.5535, "step": 171 }, { "epoch": 0.11, "learning_rate": 1.999750910791767e-05, "loss": 0.5368, "step": 172 }, { "epoch": 0.11, "learning_rate": 1.999735100163178e-05, "loss": 0.5311, "step": 173 }, { "epoch": 0.11, "learning_rate": 1.9997188031409302e-05, "loss": 0.5392, "step": 174 }, { "epoch": 0.11, "learning_rate": 1.9997020197329525e-05, "loss": 0.528, "step": 175 }, { "epoch": 0.11, "learning_rate": 1.9996847499474102e-05, "loss": 0.5416, "step": 176 }, { "epoch": 0.11, "learning_rate": 1.999666993792706e-05, "loss": 0.5657, "step": 177 }, { "epoch": 0.11, "learning_rate": 1.999648751277478e-05, "loss": 0.5483, "step": 178 }, { "epoch": 0.12, "learning_rate": 1.9996300224106023e-05, "loss": 0.5429, "step": 179 }, { "epoch": 0.12, "learning_rate": 1.99961080720119e-05, "loss": 0.5482, "step": 180 }, { "epoch": 0.12, "learning_rate": 1.9995911056585907e-05, "loss": 0.5322, "step": 181 }, { "epoch": 0.12, "learning_rate": 1.999570917792389e-05, "loss": 0.5418, "step": 182 }, { "epoch": 0.12, "learning_rate": 1.999550243612407e-05, "loss": 0.5531, "step": 183 }, { "epoch": 0.12, "learning_rate": 1.9995290831287032e-05, "loss": 0.5559, "step": 184 }, { "epoch": 0.12, "learning_rate": 1.999507436351572e-05, "loss": 0.5436, "step": 185 }, { "epoch": 0.12, "learning_rate": 1.999485303291546e-05, "loss": 0.5579, "step": 186 }, { "epoch": 0.12, "learning_rate": 1.999462683959393e-05, "loss": 0.5515, "step": 187 }, { "epoch": 0.12, "learning_rate": 1.9994395783661177e-05, "loss": 0.5471, "step": 188 }, { "epoch": 0.12, "learning_rate": 1.9994159865229617e-05, "loss": 0.5326, "step": 189 }, { "epoch": 0.12, "learning_rate": 1.999391908441403e-05, "loss": 0.5458, "step": 190 }, { "epoch": 0.12, "learning_rate": 1.999367344133156e-05, "loss": 0.5343, "step": 191 }, { "epoch": 0.12, "learning_rate": 1.9993422936101715e-05, "loss": 0.5584, "step": 192 }, { "epoch": 0.12, "learning_rate": 1.9993167568846375e-05, "loss": 0.564, "step": 193 }, { "epoch": 0.13, "learning_rate": 1.9992907339689786e-05, "loss": 0.5542, "step": 194 }, { "epoch": 0.13, "learning_rate": 1.999264224875855e-05, "loss": 0.5451, "step": 195 }, { "epoch": 0.13, "learning_rate": 1.9992372296181637e-05, "loss": 0.5544, "step": 196 }, { "epoch": 0.13, "learning_rate": 1.999209748209039e-05, "loss": 0.5338, "step": 197 }, { "epoch": 0.13, "learning_rate": 1.9991817806618512e-05, "loss": 0.5715, "step": 198 }, { "epoch": 0.13, "learning_rate": 1.9991533269902068e-05, "loss": 0.5207, "step": 199 }, { "epoch": 0.13, "learning_rate": 1.9991243872079495e-05, "loss": 0.5569, "step": 200 }, { "epoch": 0.13, "learning_rate": 1.999094961329159e-05, "loss": 0.57, "step": 201 }, { "epoch": 0.13, "learning_rate": 1.9990650493681517e-05, "loss": 0.5621, "step": 202 }, { "epoch": 0.13, "learning_rate": 1.9990346513394807e-05, "loss": 0.5594, "step": 203 }, { "epoch": 0.13, "learning_rate": 1.9990037672579347e-05, "loss": 0.5548, "step": 204 }, { "epoch": 0.13, "learning_rate": 1.99897239713854e-05, "loss": 0.5468, "step": 205 }, { "epoch": 0.13, "learning_rate": 1.9989405409965585e-05, "loss": 0.5641, "step": 206 }, { "epoch": 0.13, "learning_rate": 1.9989081988474896e-05, "loss": 0.5559, "step": 207 }, { "epoch": 0.13, "learning_rate": 1.9988753707070675e-05, "loss": 0.5241, "step": 208 }, { "epoch": 0.13, "learning_rate": 1.9988420565912647e-05, "loss": 0.5582, "step": 209 }, { "epoch": 0.14, "learning_rate": 1.998808256516289e-05, "loss": 0.5595, "step": 210 }, { "epoch": 0.14, "learning_rate": 1.9987739704985846e-05, "loss": 0.5221, "step": 211 }, { "epoch": 0.14, "learning_rate": 1.9987391985548326e-05, "loss": 0.55, "step": 212 }, { "epoch": 0.14, "learning_rate": 1.9987039407019508e-05, "loss": 0.5469, "step": 213 }, { "epoch": 0.14, "learning_rate": 1.9986681969570924e-05, "loss": 0.5509, "step": 214 }, { "epoch": 0.14, "learning_rate": 1.9986319673376476e-05, "loss": 0.5538, "step": 215 }, { "epoch": 0.14, "learning_rate": 1.998595251861243e-05, "loss": 0.5178, "step": 216 }, { "epoch": 0.14, "learning_rate": 1.998558050545741e-05, "loss": 0.5523, "step": 217 }, { "epoch": 0.14, "learning_rate": 1.998520363409242e-05, "loss": 0.5648, "step": 218 }, { "epoch": 0.14, "learning_rate": 1.998482190470081e-05, "loss": 0.5409, "step": 219 }, { "epoch": 0.14, "learning_rate": 1.9984435317468298e-05, "loss": 0.5454, "step": 220 }, { "epoch": 0.14, "learning_rate": 1.998404387258297e-05, "loss": 0.508, "step": 221 }, { "epoch": 0.14, "learning_rate": 1.9983647570235274e-05, "loss": 0.5704, "step": 222 }, { "epoch": 0.14, "learning_rate": 1.9983246410618016e-05, "loss": 0.5333, "step": 223 }, { "epoch": 0.14, "learning_rate": 1.9982840393926374e-05, "loss": 0.5423, "step": 224 }, { "epoch": 0.15, "learning_rate": 1.998242952035788e-05, "loss": 0.5624, "step": 225 }, { "epoch": 0.15, "learning_rate": 1.9982013790112437e-05, "loss": 0.582, "step": 226 }, { "epoch": 0.15, "learning_rate": 1.9981593203392302e-05, "loss": 0.5619, "step": 227 }, { "epoch": 0.15, "learning_rate": 1.9981167760402104e-05, "loss": 0.5571, "step": 228 }, { "epoch": 0.15, "learning_rate": 1.9980737461348832e-05, "loss": 0.5536, "step": 229 }, { "epoch": 0.15, "learning_rate": 1.9980302306441834e-05, "loss": 0.5412, "step": 230 }, { "epoch": 0.15, "learning_rate": 1.997986229589282e-05, "loss": 0.5462, "step": 231 }, { "epoch": 0.15, "learning_rate": 1.997941742991587e-05, "loss": 0.523, "step": 232 }, { "epoch": 0.15, "learning_rate": 1.9978967708727418e-05, "loss": 0.5455, "step": 233 }, { "epoch": 0.15, "learning_rate": 1.997851313254626e-05, "loss": 0.5489, "step": 234 }, { "epoch": 0.15, "learning_rate": 1.997805370159357e-05, "loss": 0.5374, "step": 235 }, { "epoch": 0.15, "learning_rate": 1.997758941609286e-05, "loss": 0.5308, "step": 236 }, { "epoch": 0.15, "learning_rate": 1.9977120276270018e-05, "loss": 0.5383, "step": 237 }, { "epoch": 0.15, "learning_rate": 1.997664628235329e-05, "loss": 0.5378, "step": 238 }, { "epoch": 0.15, "learning_rate": 1.9976167434573293e-05, "loss": 0.533, "step": 239 }, { "epoch": 0.15, "learning_rate": 1.9975683733162987e-05, "loss": 0.564, "step": 240 }, { "epoch": 0.16, "learning_rate": 1.9975195178357706e-05, "loss": 0.5159, "step": 241 }, { "epoch": 0.16, "learning_rate": 1.9974701770395147e-05, "loss": 0.5318, "step": 242 }, { "epoch": 0.16, "learning_rate": 1.997420350951536e-05, "loss": 0.5453, "step": 243 }, { "epoch": 0.16, "learning_rate": 1.9973700395960765e-05, "loss": 0.5232, "step": 244 }, { "epoch": 0.16, "learning_rate": 1.9973192429976135e-05, "loss": 0.5186, "step": 245 }, { "epoch": 0.16, "learning_rate": 1.9972679611808603e-05, "loss": 0.5772, "step": 246 }, { "epoch": 0.16, "learning_rate": 1.997216194170767e-05, "loss": 0.5436, "step": 247 }, { "epoch": 0.16, "learning_rate": 1.9971639419925197e-05, "loss": 0.5426, "step": 248 }, { "epoch": 0.16, "learning_rate": 1.99711120467154e-05, "loss": 0.5514, "step": 249 }, { "epoch": 0.16, "learning_rate": 1.9970579822334856e-05, "loss": 0.5412, "step": 250 }, { "epoch": 0.16, "learning_rate": 1.997004274704251e-05, "loss": 0.5794, "step": 251 }, { "epoch": 0.16, "learning_rate": 1.9969500821099654e-05, "loss": 0.557, "step": 252 }, { "epoch": 0.16, "learning_rate": 1.996895404476995e-05, "loss": 0.545, "step": 253 }, { "epoch": 0.16, "learning_rate": 1.996840241831942e-05, "loss": 0.5505, "step": 254 }, { "epoch": 0.16, "learning_rate": 1.996784594201644e-05, "loss": 0.5327, "step": 255 }, { "epoch": 0.17, "learning_rate": 1.996728461613175e-05, "loss": 0.5485, "step": 256 }, { "epoch": 0.17, "learning_rate": 1.9966718440938443e-05, "loss": 0.5157, "step": 257 }, { "epoch": 0.17, "learning_rate": 1.996614741671198e-05, "loss": 0.5348, "step": 258 }, { "epoch": 0.17, "learning_rate": 1.996557154373018e-05, "loss": 0.5515, "step": 259 }, { "epoch": 0.17, "learning_rate": 1.996499082227321e-05, "loss": 0.5141, "step": 260 }, { "epoch": 0.17, "learning_rate": 1.9964405252623614e-05, "loss": 0.5236, "step": 261 }, { "epoch": 0.17, "learning_rate": 1.9963814835066274e-05, "loss": 0.5359, "step": 262 }, { "epoch": 0.17, "learning_rate": 1.9963219569888454e-05, "loss": 0.544, "step": 263 }, { "epoch": 0.17, "learning_rate": 1.996261945737975e-05, "loss": 0.5512, "step": 264 }, { "epoch": 0.17, "learning_rate": 1.996201449783214e-05, "loss": 0.5178, "step": 265 }, { "epoch": 0.17, "learning_rate": 1.9961404691539947e-05, "loss": 0.5453, "step": 266 }, { "epoch": 0.17, "learning_rate": 1.996079003879986e-05, "loss": 0.5194, "step": 267 }, { "epoch": 0.17, "learning_rate": 1.996017053991091e-05, "loss": 0.5614, "step": 268 }, { "epoch": 0.17, "learning_rate": 1.995954619517451e-05, "loss": 0.5558, "step": 269 }, { "epoch": 0.17, "learning_rate": 1.995891700489441e-05, "loss": 0.5551, "step": 270 }, { "epoch": 0.17, "learning_rate": 1.995828296937673e-05, "loss": 0.5212, "step": 271 }, { "epoch": 0.18, "learning_rate": 1.995764408892994e-05, "loss": 0.5499, "step": 272 }, { "epoch": 0.18, "learning_rate": 1.9957000363864872e-05, "loss": 0.5495, "step": 273 }, { "epoch": 0.18, "learning_rate": 1.9956351794494706e-05, "loss": 0.5249, "step": 274 }, { "epoch": 0.18, "learning_rate": 1.9955698381134994e-05, "loss": 0.5451, "step": 275 }, { "epoch": 0.18, "learning_rate": 1.995504012410363e-05, "loss": 0.5735, "step": 276 }, { "epoch": 0.18, "learning_rate": 1.9954377023720877e-05, "loss": 0.5498, "step": 277 }, { "epoch": 0.18, "learning_rate": 1.995370908030934e-05, "loss": 0.5494, "step": 278 }, { "epoch": 0.18, "learning_rate": 1.9953036294194003e-05, "loss": 0.5351, "step": 279 }, { "epoch": 0.18, "learning_rate": 1.995235866570217e-05, "loss": 0.5497, "step": 280 }, { "epoch": 0.18, "learning_rate": 1.9951676195163542e-05, "loss": 0.5328, "step": 281 }, { "epoch": 0.18, "learning_rate": 1.995098888291015e-05, "loss": 0.5339, "step": 282 }, { "epoch": 0.18, "learning_rate": 1.9950296729276384e-05, "loss": 0.5167, "step": 283 }, { "epoch": 0.18, "learning_rate": 1.9949599734598993e-05, "loss": 0.5296, "step": 284 }, { "epoch": 0.18, "learning_rate": 1.994889789921708e-05, "loss": 0.5455, "step": 285 }, { "epoch": 0.18, "learning_rate": 1.9948191223472108e-05, "loss": 0.5314, "step": 286 }, { "epoch": 0.19, "learning_rate": 1.994747970770789e-05, "loss": 0.5448, "step": 287 }, { "epoch": 0.19, "learning_rate": 1.994676335227059e-05, "loss": 0.5407, "step": 288 }, { "epoch": 0.19, "learning_rate": 1.9946042157508734e-05, "loss": 0.5241, "step": 289 }, { "epoch": 0.19, "learning_rate": 1.99453161237732e-05, "loss": 0.5222, "step": 290 }, { "epoch": 0.19, "learning_rate": 1.994458525141722e-05, "loss": 0.5473, "step": 291 }, { "epoch": 0.19, "learning_rate": 1.9943849540796375e-05, "loss": 0.5362, "step": 292 }, { "epoch": 0.19, "learning_rate": 1.9943108992268612e-05, "loss": 0.5086, "step": 293 }, { "epoch": 0.19, "learning_rate": 1.994236360619422e-05, "loss": 0.5345, "step": 294 }, { "epoch": 0.19, "learning_rate": 1.9941613382935848e-05, "loss": 0.5521, "step": 295 }, { "epoch": 0.19, "learning_rate": 1.9940858322858493e-05, "loss": 0.5317, "step": 296 }, { "epoch": 0.19, "learning_rate": 1.9940098426329518e-05, "loss": 0.5357, "step": 297 }, { "epoch": 0.19, "learning_rate": 1.993933369371862e-05, "loss": 0.5394, "step": 298 }, { "epoch": 0.19, "learning_rate": 1.9938564125397862e-05, "loss": 0.5611, "step": 299 }, { "epoch": 0.19, "learning_rate": 1.9937789721741654e-05, "loss": 0.5735, "step": 300 }, { "epoch": 0.19, "learning_rate": 1.9937010483126763e-05, "loss": 0.5204, "step": 301 }, { "epoch": 0.19, "learning_rate": 1.993622640993231e-05, "loss": 0.5674, "step": 302 }, { "epoch": 0.2, "learning_rate": 1.9935437502539762e-05, "loss": 0.5214, "step": 303 }, { "epoch": 0.2, "learning_rate": 1.9934643761332933e-05, "loss": 0.5282, "step": 304 }, { "epoch": 0.2, "learning_rate": 1.9933845186698007e-05, "loss": 0.5032, "step": 305 }, { "epoch": 0.2, "learning_rate": 1.9933041779023502e-05, "loss": 0.5617, "step": 306 }, { "epoch": 0.2, "learning_rate": 1.9932233538700295e-05, "loss": 0.5301, "step": 307 }, { "epoch": 0.2, "learning_rate": 1.9931420466121613e-05, "loss": 0.5412, "step": 308 }, { "epoch": 0.2, "learning_rate": 1.9930602561683035e-05, "loss": 0.513, "step": 309 }, { "epoch": 0.2, "learning_rate": 1.992977982578249e-05, "loss": 0.5272, "step": 310 }, { "epoch": 0.2, "learning_rate": 1.9928952258820255e-05, "loss": 0.5226, "step": 311 }, { "epoch": 0.2, "learning_rate": 1.9928119861198962e-05, "loss": 0.5489, "step": 312 }, { "epoch": 0.2, "learning_rate": 1.9927282633323596e-05, "loss": 0.5454, "step": 313 }, { "epoch": 0.2, "learning_rate": 1.992644057560148e-05, "loss": 0.518, "step": 314 }, { "epoch": 0.2, "learning_rate": 1.9925593688442296e-05, "loss": 0.5375, "step": 315 }, { "epoch": 0.2, "learning_rate": 1.9924741972258076e-05, "loss": 0.5294, "step": 316 }, { "epoch": 0.2, "learning_rate": 1.9923885427463193e-05, "loss": 0.5467, "step": 317 }, { "epoch": 0.21, "learning_rate": 1.9923024054474384e-05, "loss": 0.5252, "step": 318 }, { "epoch": 0.21, "learning_rate": 1.992215785371072e-05, "loss": 0.5316, "step": 319 }, { "epoch": 0.21, "learning_rate": 1.9921286825593632e-05, "loss": 0.5402, "step": 320 }, { "epoch": 0.21, "learning_rate": 1.992041097054689e-05, "loss": 0.5535, "step": 321 }, { "epoch": 0.21, "learning_rate": 1.9919530288996617e-05, "loss": 0.5292, "step": 322 }, { "epoch": 0.21, "learning_rate": 1.9918644781371292e-05, "loss": 0.5497, "step": 323 }, { "epoch": 0.21, "learning_rate": 1.9917754448101725e-05, "loss": 0.5495, "step": 324 }, { "epoch": 0.21, "learning_rate": 1.991685928962109e-05, "loss": 0.5325, "step": 325 }, { "epoch": 0.21, "learning_rate": 1.9915959306364897e-05, "loss": 0.5479, "step": 326 }, { "epoch": 0.21, "learning_rate": 1.991505449877101e-05, "loss": 0.5601, "step": 327 }, { "epoch": 0.21, "learning_rate": 1.9914144867279644e-05, "loss": 0.5369, "step": 328 }, { "epoch": 0.21, "learning_rate": 1.9913230412333348e-05, "loss": 0.5402, "step": 329 }, { "epoch": 0.21, "learning_rate": 1.9912311134377023e-05, "loss": 0.54, "step": 330 }, { "epoch": 0.21, "learning_rate": 1.9911387033857924e-05, "loss": 0.5231, "step": 331 }, { "epoch": 0.21, "learning_rate": 1.9910458111225645e-05, "loss": 0.5432, "step": 332 }, { "epoch": 0.21, "learning_rate": 1.9909524366932127e-05, "loss": 0.5065, "step": 333 }, { "epoch": 0.22, "learning_rate": 1.9908585801431658e-05, "loss": 0.5712, "step": 334 }, { "epoch": 0.22, "learning_rate": 1.9907642415180874e-05, "loss": 0.5608, "step": 335 }, { "epoch": 0.22, "learning_rate": 1.990669420863875e-05, "loss": 0.5315, "step": 336 }, { "epoch": 0.22, "learning_rate": 1.990574118226661e-05, "loss": 0.53, "step": 337 }, { "epoch": 0.22, "learning_rate": 1.9904783336528128e-05, "loss": 0.5429, "step": 338 }, { "epoch": 0.22, "learning_rate": 1.9903820671889308e-05, "loss": 0.5159, "step": 339 }, { "epoch": 0.22, "learning_rate": 1.9902853188818518e-05, "loss": 0.5351, "step": 340 }, { "epoch": 0.22, "learning_rate": 1.9901880887786455e-05, "loss": 0.5402, "step": 341 }, { "epoch": 0.22, "learning_rate": 1.9900903769266167e-05, "loss": 0.5427, "step": 342 }, { "epoch": 0.22, "learning_rate": 1.9899921833733042e-05, "loss": 0.5347, "step": 343 }, { "epoch": 0.22, "learning_rate": 1.9898935081664814e-05, "loss": 0.5458, "step": 344 }, { "epoch": 0.22, "learning_rate": 1.9897943513541564e-05, "loss": 0.5412, "step": 345 }, { "epoch": 0.22, "learning_rate": 1.9896947129845707e-05, "loss": 0.5266, "step": 346 }, { "epoch": 0.22, "learning_rate": 1.989594593106201e-05, "loss": 0.5314, "step": 347 }, { "epoch": 0.22, "learning_rate": 1.9894939917677577e-05, "loss": 0.5732, "step": 348 }, { "epoch": 0.23, "learning_rate": 1.989392909018186e-05, "loss": 0.5612, "step": 349 }, { "epoch": 0.23, "learning_rate": 1.9892913449066643e-05, "loss": 0.5427, "step": 350 }, { "epoch": 0.23, "learning_rate": 1.989189299482606e-05, "loss": 0.5552, "step": 351 }, { "epoch": 0.23, "learning_rate": 1.9890867727956587e-05, "loss": 0.5122, "step": 352 }, { "epoch": 0.23, "learning_rate": 1.988983764895704e-05, "loss": 0.5284, "step": 353 }, { "epoch": 0.23, "learning_rate": 1.9888802758328574e-05, "loss": 0.5401, "step": 354 }, { "epoch": 0.23, "learning_rate": 1.9887763056574688e-05, "loss": 0.5387, "step": 355 }, { "epoch": 0.23, "learning_rate": 1.988671854420122e-05, "loss": 0.5396, "step": 356 }, { "epoch": 0.23, "learning_rate": 1.9885669221716343e-05, "loss": 0.5576, "step": 357 }, { "epoch": 0.23, "learning_rate": 1.9884615089630584e-05, "loss": 0.5345, "step": 358 }, { "epoch": 0.23, "learning_rate": 1.9883556148456802e-05, "loss": 0.5508, "step": 359 }, { "epoch": 0.23, "learning_rate": 1.9882492398710192e-05, "loss": 0.5368, "step": 360 }, { "epoch": 0.23, "learning_rate": 1.988142384090829e-05, "loss": 0.513, "step": 361 }, { "epoch": 0.23, "learning_rate": 1.988035047557098e-05, "loss": 0.5137, "step": 362 }, { "epoch": 0.23, "learning_rate": 1.9879272303220473e-05, "loss": 0.5366, "step": 363 }, { "epoch": 0.24, "learning_rate": 1.987818932438133e-05, "loss": 0.5329, "step": 364 }, { "epoch": 0.24, "learning_rate": 1.9877101539580437e-05, "loss": 0.5417, "step": 365 }, { "epoch": 0.24, "learning_rate": 1.987600894934703e-05, "loss": 0.527, "step": 366 }, { "epoch": 0.24, "learning_rate": 1.987491155421268e-05, "loss": 0.5317, "step": 367 }, { "epoch": 0.24, "learning_rate": 1.987380935471129e-05, "loss": 0.5264, "step": 368 }, { "epoch": 0.24, "learning_rate": 1.9872702351379108e-05, "loss": 0.5193, "step": 369 }, { "epoch": 0.24, "learning_rate": 1.987159054475472e-05, "loss": 0.5315, "step": 370 }, { "epoch": 0.24, "learning_rate": 1.9870473935379038e-05, "loss": 0.5466, "step": 371 }, { "epoch": 0.24, "learning_rate": 1.986935252379532e-05, "loss": 0.5237, "step": 372 }, { "epoch": 0.24, "learning_rate": 1.986822631054916e-05, "loss": 0.5294, "step": 373 }, { "epoch": 0.24, "learning_rate": 1.9867095296188483e-05, "loss": 0.4956, "step": 374 }, { "epoch": 0.24, "learning_rate": 1.986595948126356e-05, "loss": 0.5384, "step": 375 }, { "epoch": 0.24, "learning_rate": 1.9864818866326978e-05, "loss": 0.5612, "step": 376 }, { "epoch": 0.24, "learning_rate": 1.9863673451933684e-05, "loss": 0.5131, "step": 377 }, { "epoch": 0.24, "learning_rate": 1.986252323864094e-05, "loss": 0.5381, "step": 378 }, { "epoch": 0.24, "learning_rate": 1.9861368227008352e-05, "loss": 0.5267, "step": 379 }, { "epoch": 0.25, "learning_rate": 1.9860208417597863e-05, "loss": 0.5225, "step": 380 }, { "epoch": 0.25, "learning_rate": 1.9859043810973747e-05, "loss": 0.5399, "step": 381 }, { "epoch": 0.25, "learning_rate": 1.9857874407702606e-05, "loss": 0.5378, "step": 382 }, { "epoch": 0.25, "learning_rate": 1.9856700208353384e-05, "loss": 0.5491, "step": 383 }, { "epoch": 0.25, "learning_rate": 1.9855521213497355e-05, "loss": 0.5325, "step": 384 }, { "epoch": 0.25, "learning_rate": 1.9854337423708125e-05, "loss": 0.5023, "step": 385 }, { "epoch": 0.25, "learning_rate": 1.9853148839561638e-05, "loss": 0.5401, "step": 386 }, { "epoch": 0.25, "learning_rate": 1.9851955461636164e-05, "loss": 0.5376, "step": 387 }, { "epoch": 0.25, "learning_rate": 1.9850757290512313e-05, "loss": 0.5657, "step": 388 }, { "epoch": 0.25, "learning_rate": 1.9849554326773013e-05, "loss": 0.538, "step": 389 }, { "epoch": 0.25, "learning_rate": 1.984834657100354e-05, "loss": 0.5383, "step": 390 }, { "epoch": 0.25, "learning_rate": 1.9847134023791497e-05, "loss": 0.5422, "step": 391 }, { "epoch": 0.25, "learning_rate": 1.9845916685726808e-05, "loss": 0.5054, "step": 392 }, { "epoch": 0.25, "learning_rate": 1.984469455740174e-05, "loss": 0.5496, "step": 393 }, { "epoch": 0.25, "learning_rate": 1.9843467639410885e-05, "loss": 0.5269, "step": 394 }, { "epoch": 0.26, "learning_rate": 1.9842235932351164e-05, "loss": 0.5169, "step": 395 }, { "epoch": 0.26, "learning_rate": 1.9840999436821836e-05, "loss": 0.5428, "step": 396 }, { "epoch": 0.26, "learning_rate": 1.9839758153424478e-05, "loss": 0.5226, "step": 397 }, { "epoch": 0.26, "learning_rate": 1.9838512082763002e-05, "loss": 0.5509, "step": 398 }, { "epoch": 0.26, "learning_rate": 1.9837261225443657e-05, "loss": 0.5088, "step": 399 }, { "epoch": 0.26, "learning_rate": 1.983600558207501e-05, "loss": 0.5259, "step": 400 }, { "epoch": 0.26, "learning_rate": 1.9834745153267954e-05, "loss": 0.5034, "step": 401 }, { "epoch": 0.26, "learning_rate": 1.9833479939635724e-05, "loss": 0.5523, "step": 402 }, { "epoch": 0.26, "learning_rate": 1.9832209941793865e-05, "loss": 0.5178, "step": 403 }, { "epoch": 0.26, "learning_rate": 1.983093516036027e-05, "loss": 0.5325, "step": 404 }, { "epoch": 0.26, "learning_rate": 1.9829655595955144e-05, "loss": 0.5085, "step": 405 }, { "epoch": 0.26, "learning_rate": 1.9828371249201025e-05, "loss": 0.5554, "step": 406 }, { "epoch": 0.26, "learning_rate": 1.9827082120722774e-05, "loss": 0.5494, "step": 407 }, { "epoch": 0.26, "learning_rate": 1.9825788211147587e-05, "loss": 0.5123, "step": 408 }, { "epoch": 0.26, "learning_rate": 1.9824489521104975e-05, "loss": 0.5323, "step": 409 }, { "epoch": 0.26, "learning_rate": 1.9823186051226783e-05, "loss": 0.5482, "step": 410 }, { "epoch": 0.27, "learning_rate": 1.982187780214717e-05, "loss": 0.5365, "step": 411 }, { "epoch": 0.27, "learning_rate": 1.9820564774502644e-05, "loss": 0.5223, "step": 412 }, { "epoch": 0.27, "learning_rate": 1.9819246968932007e-05, "loss": 0.5216, "step": 413 }, { "epoch": 0.27, "learning_rate": 1.9817924386076416e-05, "loss": 0.5343, "step": 414 }, { "epoch": 0.27, "learning_rate": 1.9816597026579326e-05, "loss": 0.5283, "step": 415 }, { "epoch": 0.27, "learning_rate": 1.981526489108653e-05, "loss": 0.5313, "step": 416 }, { "epoch": 0.27, "learning_rate": 1.981392798024614e-05, "loss": 0.5116, "step": 417 }, { "epoch": 0.27, "learning_rate": 1.98125862947086e-05, "loss": 0.526, "step": 418 }, { "epoch": 0.27, "learning_rate": 1.9811239835126667e-05, "loss": 0.5288, "step": 419 }, { "epoch": 0.27, "learning_rate": 1.980988860215542e-05, "loss": 0.5423, "step": 420 }, { "epoch": 0.27, "learning_rate": 1.980853259645227e-05, "loss": 0.5465, "step": 421 }, { "epoch": 0.27, "learning_rate": 1.9807171818676944e-05, "loss": 0.5568, "step": 422 }, { "epoch": 0.27, "learning_rate": 1.9805806269491487e-05, "loss": 0.5276, "step": 423 }, { "epoch": 0.27, "learning_rate": 1.980443594956027e-05, "loss": 0.5368, "step": 424 }, { "epoch": 0.27, "learning_rate": 1.9803060859549988e-05, "loss": 0.52, "step": 425 }, { "epoch": 0.28, "learning_rate": 1.9801681000129652e-05, "loss": 0.5274, "step": 426 }, { "epoch": 0.28, "learning_rate": 1.980029637197059e-05, "loss": 0.5366, "step": 427 }, { "epoch": 0.28, "learning_rate": 1.9798906975746462e-05, "loss": 0.5499, "step": 428 }, { "epoch": 0.28, "learning_rate": 1.9797512812133236e-05, "loss": 0.5214, "step": 429 }, { "epoch": 0.28, "learning_rate": 1.9796113881809207e-05, "loss": 0.5087, "step": 430 }, { "epoch": 0.28, "learning_rate": 1.9794710185454983e-05, "loss": 0.5426, "step": 431 }, { "epoch": 0.28, "learning_rate": 1.9793301723753494e-05, "loss": 0.5256, "step": 432 }, { "epoch": 0.28, "learning_rate": 1.979188849738999e-05, "loss": 0.5391, "step": 433 }, { "epoch": 0.28, "learning_rate": 1.9790470507052043e-05, "loss": 0.5295, "step": 434 }, { "epoch": 0.28, "learning_rate": 1.9789047753429527e-05, "loss": 0.531, "step": 435 }, { "epoch": 0.28, "learning_rate": 1.9787620237214648e-05, "loss": 0.5447, "step": 436 }, { "epoch": 0.28, "learning_rate": 1.9786187959101926e-05, "loss": 0.5294, "step": 437 }, { "epoch": 0.28, "learning_rate": 1.9784750919788192e-05, "loss": 0.5483, "step": 438 }, { "epoch": 0.28, "learning_rate": 1.9783309119972607e-05, "loss": 0.5397, "step": 439 }, { "epoch": 0.28, "learning_rate": 1.9781862560356632e-05, "loss": 0.5325, "step": 440 }, { "epoch": 0.28, "learning_rate": 1.9780411241644054e-05, "loss": 0.501, "step": 441 }, { "epoch": 0.29, "learning_rate": 1.9778955164540966e-05, "loss": 0.5103, "step": 442 }, { "epoch": 0.29, "learning_rate": 1.977749432975579e-05, "loss": 0.5448, "step": 443 }, { "epoch": 0.29, "learning_rate": 1.9776028737999256e-05, "loss": 0.541, "step": 444 }, { "epoch": 0.29, "learning_rate": 1.9774558389984398e-05, "loss": 0.5259, "step": 445 }, { "epoch": 0.29, "learning_rate": 1.977308328642658e-05, "loss": 0.5289, "step": 446 }, { "epoch": 0.29, "learning_rate": 1.9771603428043476e-05, "loss": 0.5693, "step": 447 }, { "epoch": 0.29, "learning_rate": 1.9770118815555063e-05, "loss": 0.5139, "step": 448 }, { "epoch": 0.29, "learning_rate": 1.9768629449683644e-05, "loss": 0.5041, "step": 449 }, { "epoch": 0.29, "learning_rate": 1.9767135331153827e-05, "loss": 0.5147, "step": 450 }, { "epoch": 0.29, "learning_rate": 1.9765636460692534e-05, "loss": 0.5157, "step": 451 }, { "epoch": 0.29, "learning_rate": 1.9764132839029e-05, "loss": 0.5483, "step": 452 }, { "epoch": 0.29, "learning_rate": 1.9762624466894767e-05, "loss": 0.5297, "step": 453 }, { "epoch": 0.29, "learning_rate": 1.9761111345023702e-05, "loss": 0.5424, "step": 454 }, { "epoch": 0.29, "learning_rate": 1.9759593474151965e-05, "loss": 0.5515, "step": 455 }, { "epoch": 0.29, "learning_rate": 1.9758070855018033e-05, "loss": 0.5214, "step": 456 }, { "epoch": 0.3, "learning_rate": 1.97565434883627e-05, "loss": 0.5137, "step": 457 }, { "epoch": 0.3, "learning_rate": 1.975501137492906e-05, "loss": 0.5408, "step": 458 }, { "epoch": 0.3, "learning_rate": 1.9753474515462526e-05, "loss": 0.5507, "step": 459 }, { "epoch": 0.3, "learning_rate": 1.9751932910710808e-05, "loss": 0.5226, "step": 460 }, { "epoch": 0.3, "learning_rate": 1.9750386561423935e-05, "loss": 0.551, "step": 461 }, { "epoch": 0.3, "learning_rate": 1.9748835468354243e-05, "loss": 0.5423, "step": 462 }, { "epoch": 0.3, "learning_rate": 1.974727963225637e-05, "loss": 0.5084, "step": 463 }, { "epoch": 0.3, "learning_rate": 1.9745719053887265e-05, "loss": 0.5031, "step": 464 }, { "epoch": 0.3, "learning_rate": 1.9744153734006187e-05, "loss": 0.5297, "step": 465 }, { "epoch": 0.3, "learning_rate": 1.97425836733747e-05, "loss": 0.5029, "step": 466 }, { "epoch": 0.3, "learning_rate": 1.974100887275667e-05, "loss": 0.5294, "step": 467 }, { "epoch": 0.3, "learning_rate": 1.9739429332918276e-05, "loss": 0.528, "step": 468 }, { "epoch": 0.3, "learning_rate": 1.9737845054627996e-05, "loss": 0.5551, "step": 469 }, { "epoch": 0.3, "learning_rate": 1.9736256038656624e-05, "loss": 0.5375, "step": 470 }, { "epoch": 0.3, "learning_rate": 1.973466228577725e-05, "loss": 0.5556, "step": 471 }, { "epoch": 0.3, "learning_rate": 1.9733063796765267e-05, "loss": 0.5253, "step": 472 }, { "epoch": 0.31, "learning_rate": 1.9731460572398377e-05, "loss": 0.5209, "step": 473 }, { "epoch": 0.31, "learning_rate": 1.9729852613456586e-05, "loss": 0.5211, "step": 474 }, { "epoch": 0.31, "learning_rate": 1.9728239920722207e-05, "loss": 0.5268, "step": 475 }, { "epoch": 0.31, "learning_rate": 1.972662249497984e-05, "loss": 0.4986, "step": 476 }, { "epoch": 0.31, "learning_rate": 1.9725000337016407e-05, "loss": 0.5404, "step": 477 }, { "epoch": 0.31, "learning_rate": 1.9723373447621125e-05, "loss": 0.5192, "step": 478 }, { "epoch": 0.31, "learning_rate": 1.9721741827585505e-05, "loss": 0.5154, "step": 479 }, { "epoch": 0.31, "learning_rate": 1.972010547770338e-05, "loss": 0.5226, "step": 480 }, { "epoch": 0.31, "learning_rate": 1.9718464398770856e-05, "loss": 0.5242, "step": 481 }, { "epoch": 0.31, "learning_rate": 1.9716818591586367e-05, "loss": 0.5179, "step": 482 }, { "epoch": 0.31, "learning_rate": 1.9715168056950626e-05, "loss": 0.5344, "step": 483 }, { "epoch": 0.31, "learning_rate": 1.9713512795666663e-05, "loss": 0.5218, "step": 484 }, { "epoch": 0.31, "learning_rate": 1.9711852808539795e-05, "loss": 0.5335, "step": 485 }, { "epoch": 0.31, "learning_rate": 1.9710188096377645e-05, "loss": 0.5395, "step": 486 }, { "epoch": 0.31, "learning_rate": 1.970851865999013e-05, "loss": 0.5522, "step": 487 }, { "epoch": 0.32, "learning_rate": 1.970684450018948e-05, "loss": 0.5229, "step": 488 }, { "epoch": 0.32, "learning_rate": 1.9705165617790195e-05, "loss": 0.4993, "step": 489 }, { "epoch": 0.32, "learning_rate": 1.9703482013609098e-05, "loss": 0.5366, "step": 490 }, { "epoch": 0.32, "learning_rate": 1.9701793688465302e-05, "loss": 0.543, "step": 491 }, { "epoch": 0.32, "learning_rate": 1.9700100643180213e-05, "loss": 0.5345, "step": 492 }, { "epoch": 0.32, "learning_rate": 1.9698402878577528e-05, "loss": 0.5308, "step": 493 }, { "epoch": 0.32, "learning_rate": 1.969670039548326e-05, "loss": 0.5218, "step": 494 }, { "epoch": 0.32, "learning_rate": 1.9694993194725703e-05, "loss": 0.5147, "step": 495 }, { "epoch": 0.32, "learning_rate": 1.969328127713544e-05, "loss": 0.5315, "step": 496 }, { "epoch": 0.32, "learning_rate": 1.969156464354537e-05, "loss": 0.5246, "step": 497 }, { "epoch": 0.32, "learning_rate": 1.968984329479066e-05, "loss": 0.5251, "step": 498 }, { "epoch": 0.32, "learning_rate": 1.9688117231708795e-05, "loss": 0.5446, "step": 499 }, { "epoch": 0.32, "learning_rate": 1.9686386455139544e-05, "loss": 0.552, "step": 500 }, { "epoch": 0.32, "learning_rate": 1.968465096592496e-05, "loss": 0.5372, "step": 501 }, { "epoch": 0.32, "learning_rate": 1.9682910764909405e-05, "loss": 0.5087, "step": 502 }, { "epoch": 0.32, "learning_rate": 1.9681165852939526e-05, "loss": 0.5324, "step": 503 }, { "epoch": 0.33, "learning_rate": 1.9679416230864265e-05, "loss": 0.5375, "step": 504 }, { "epoch": 0.33, "learning_rate": 1.9677661899534837e-05, "loss": 0.532, "step": 505 }, { "epoch": 0.33, "learning_rate": 1.9675902859804786e-05, "loss": 0.5115, "step": 506 }, { "epoch": 0.33, "learning_rate": 1.9674139112529908e-05, "loss": 0.5388, "step": 507 }, { "epoch": 0.33, "learning_rate": 1.9672370658568306e-05, "loss": 0.5271, "step": 508 }, { "epoch": 0.33, "learning_rate": 1.9670597498780385e-05, "loss": 0.5243, "step": 509 }, { "epoch": 0.33, "learning_rate": 1.9668819634028816e-05, "loss": 0.527, "step": 510 }, { "epoch": 0.33, "learning_rate": 1.9667037065178577e-05, "loss": 0.5067, "step": 511 }, { "epoch": 0.33, "learning_rate": 1.966524979309692e-05, "loss": 0.5406, "step": 512 }, { "epoch": 0.33, "learning_rate": 1.96634578186534e-05, "loss": 0.5242, "step": 513 }, { "epoch": 0.33, "learning_rate": 1.9661661142719856e-05, "loss": 0.5377, "step": 514 }, { "epoch": 0.33, "learning_rate": 1.96598597661704e-05, "loss": 0.5251, "step": 515 }, { "epoch": 0.33, "learning_rate": 1.9658053689881453e-05, "loss": 0.524, "step": 516 }, { "epoch": 0.33, "learning_rate": 1.9656242914731708e-05, "loss": 0.5096, "step": 517 }, { "epoch": 0.33, "learning_rate": 1.9654427441602145e-05, "loss": 0.4962, "step": 518 }, { "epoch": 0.34, "learning_rate": 1.9652607271376036e-05, "loss": 0.5179, "step": 519 }, { "epoch": 0.34, "learning_rate": 1.9650782404938933e-05, "loss": 0.5297, "step": 520 }, { "epoch": 0.34, "learning_rate": 1.9648952843178676e-05, "loss": 0.5068, "step": 521 }, { "epoch": 0.34, "learning_rate": 1.964711858698539e-05, "loss": 0.5411, "step": 522 }, { "epoch": 0.34, "learning_rate": 1.9645279637251474e-05, "loss": 0.529, "step": 523 }, { "epoch": 0.34, "learning_rate": 1.9643435994871626e-05, "loss": 0.5522, "step": 524 }, { "epoch": 0.34, "learning_rate": 1.9641587660742822e-05, "loss": 0.5147, "step": 525 }, { "epoch": 0.34, "learning_rate": 1.963973463576431e-05, "loss": 0.5814, "step": 526 }, { "epoch": 0.34, "learning_rate": 1.9637876920837633e-05, "loss": 0.5385, "step": 527 }, { "epoch": 0.34, "learning_rate": 1.963601451686661e-05, "loss": 0.5507, "step": 528 }, { "epoch": 0.34, "learning_rate": 1.9634147424757347e-05, "loss": 0.5506, "step": 529 }, { "epoch": 0.34, "learning_rate": 1.9632275645418218e-05, "loss": 0.5117, "step": 530 }, { "epoch": 0.34, "learning_rate": 1.9630399179759888e-05, "loss": 0.5417, "step": 531 }, { "epoch": 0.34, "learning_rate": 1.9628518028695307e-05, "loss": 0.5435, "step": 532 }, { "epoch": 0.34, "learning_rate": 1.962663219313969e-05, "loss": 0.5438, "step": 533 }, { "epoch": 0.34, "learning_rate": 1.9624741674010544e-05, "loss": 0.5227, "step": 534 }, { "epoch": 0.35, "learning_rate": 1.962284647222764e-05, "loss": 0.5473, "step": 535 }, { "epoch": 0.35, "learning_rate": 1.9620946588713048e-05, "loss": 0.5534, "step": 536 }, { "epoch": 0.35, "learning_rate": 1.9619042024391098e-05, "loss": 0.5183, "step": 537 }, { "epoch": 0.35, "learning_rate": 1.96171327801884e-05, "loss": 0.5088, "step": 538 }, { "epoch": 0.35, "learning_rate": 1.9615218857033852e-05, "loss": 0.4927, "step": 539 }, { "epoch": 0.35, "learning_rate": 1.9613300255858615e-05, "loss": 0.5001, "step": 540 }, { "epoch": 0.35, "learning_rate": 1.9611376977596138e-05, "loss": 0.5262, "step": 541 }, { "epoch": 0.35, "learning_rate": 1.9609449023182133e-05, "loss": 0.5534, "step": 542 }, { "epoch": 0.35, "learning_rate": 1.960751639355459e-05, "loss": 0.5526, "step": 543 }, { "epoch": 0.35, "learning_rate": 1.960557908965379e-05, "loss": 0.5411, "step": 544 }, { "epoch": 0.35, "learning_rate": 1.9603637112422263e-05, "loss": 0.5257, "step": 545 }, { "epoch": 0.35, "learning_rate": 1.960169046280483e-05, "loss": 0.5081, "step": 546 }, { "epoch": 0.35, "learning_rate": 1.959973914174858e-05, "loss": 0.5143, "step": 547 }, { "epoch": 0.35, "learning_rate": 1.9597783150202873e-05, "loss": 0.5431, "step": 548 }, { "epoch": 0.35, "learning_rate": 1.959582248911934e-05, "loss": 0.524, "step": 549 }, { "epoch": 0.36, "learning_rate": 1.9593857159451897e-05, "loss": 0.5435, "step": 550 }, { "epoch": 0.36, "learning_rate": 1.959188716215671e-05, "loss": 0.5164, "step": 551 }, { "epoch": 0.36, "learning_rate": 1.9589912498192233e-05, "loss": 0.5172, "step": 552 }, { "epoch": 0.36, "learning_rate": 1.9587933168519183e-05, "loss": 0.5333, "step": 553 }, { "epoch": 0.36, "learning_rate": 1.958594917410055e-05, "loss": 0.5105, "step": 554 }, { "epoch": 0.36, "learning_rate": 1.9583960515901586e-05, "loss": 0.5283, "step": 555 }, { "epoch": 0.36, "learning_rate": 1.9581967194889826e-05, "loss": 0.5295, "step": 556 }, { "epoch": 0.36, "learning_rate": 1.957996921203506e-05, "loss": 0.5359, "step": 557 }, { "epoch": 0.36, "learning_rate": 1.9577966568309358e-05, "loss": 0.5194, "step": 558 }, { "epoch": 0.36, "learning_rate": 1.9575959264687047e-05, "loss": 0.522, "step": 559 }, { "epoch": 0.36, "learning_rate": 1.957394730214472e-05, "loss": 0.5252, "step": 560 }, { "epoch": 0.36, "learning_rate": 1.9571930681661254e-05, "loss": 0.5251, "step": 561 }, { "epoch": 0.36, "learning_rate": 1.956990940421777e-05, "loss": 0.5317, "step": 562 }, { "epoch": 0.36, "learning_rate": 1.9567883470797673e-05, "loss": 0.5112, "step": 563 }, { "epoch": 0.36, "learning_rate": 1.956585288238662e-05, "loss": 0.5459, "step": 564 }, { "epoch": 0.36, "learning_rate": 1.9563817639972535e-05, "loss": 0.5297, "step": 565 }, { "epoch": 0.37, "learning_rate": 1.9561777744545616e-05, "loss": 0.551, "step": 566 }, { "epoch": 0.37, "learning_rate": 1.955973319709832e-05, "loss": 0.5234, "step": 567 }, { "epoch": 0.37, "learning_rate": 1.955768399862536e-05, "loss": 0.5151, "step": 568 }, { "epoch": 0.37, "learning_rate": 1.9555630150123717e-05, "loss": 0.527, "step": 569 }, { "epoch": 0.37, "learning_rate": 1.9553571652592637e-05, "loss": 0.5268, "step": 570 }, { "epoch": 0.37, "learning_rate": 1.9551508507033623e-05, "loss": 0.5035, "step": 571 }, { "epoch": 0.37, "learning_rate": 1.9549440714450447e-05, "loss": 0.5098, "step": 572 }, { "epoch": 0.37, "learning_rate": 1.9547368275849128e-05, "loss": 0.5506, "step": 573 }, { "epoch": 0.37, "learning_rate": 1.9545291192237962e-05, "loss": 0.4807, "step": 574 }, { "epoch": 0.37, "learning_rate": 1.9543209464627493e-05, "loss": 0.5326, "step": 575 }, { "epoch": 0.37, "learning_rate": 1.9541123094030528e-05, "loss": 0.5183, "step": 576 }, { "epoch": 0.37, "learning_rate": 1.953903208146214e-05, "loss": 0.5152, "step": 577 }, { "epoch": 0.37, "learning_rate": 1.9536936427939647e-05, "loss": 0.5414, "step": 578 }, { "epoch": 0.37, "learning_rate": 1.953483613448263e-05, "loss": 0.5182, "step": 579 }, { "epoch": 0.37, "learning_rate": 1.9532731202112935e-05, "loss": 0.532, "step": 580 }, { "epoch": 0.38, "learning_rate": 1.953062163185466e-05, "loss": 0.5202, "step": 581 }, { "epoch": 0.38, "learning_rate": 1.9528507424734148e-05, "loss": 0.4976, "step": 582 }, { "epoch": 0.38, "learning_rate": 1.9526388581780016e-05, "loss": 0.5149, "step": 583 }, { "epoch": 0.38, "learning_rate": 1.9524265104023133e-05, "loss": 0.5282, "step": 584 }, { "epoch": 0.38, "learning_rate": 1.952213699249661e-05, "loss": 0.5355, "step": 585 }, { "epoch": 0.38, "learning_rate": 1.9520004248235826e-05, "loss": 0.5371, "step": 586 }, { "epoch": 0.38, "learning_rate": 1.951786687227841e-05, "loss": 0.5383, "step": 587 }, { "epoch": 0.38, "learning_rate": 1.9515724865664242e-05, "loss": 0.5186, "step": 588 }, { "epoch": 0.38, "learning_rate": 1.9513578229435454e-05, "loss": 0.5302, "step": 589 }, { "epoch": 0.38, "learning_rate": 1.9511426964636437e-05, "loss": 0.4952, "step": 590 }, { "epoch": 0.38, "learning_rate": 1.9509271072313826e-05, "loss": 0.5334, "step": 591 }, { "epoch": 0.38, "learning_rate": 1.9507110553516518e-05, "loss": 0.5343, "step": 592 }, { "epoch": 0.38, "learning_rate": 1.9504945409295644e-05, "loss": 0.4968, "step": 593 }, { "epoch": 0.38, "learning_rate": 1.9502775640704606e-05, "loss": 0.4991, "step": 594 }, { "epoch": 0.38, "learning_rate": 1.9500601248799035e-05, "loss": 0.5088, "step": 595 }, { "epoch": 0.38, "learning_rate": 1.949842223463683e-05, "loss": 0.5173, "step": 596 }, { "epoch": 0.39, "learning_rate": 1.949623859927813e-05, "loss": 0.5067, "step": 597 }, { "epoch": 0.39, "learning_rate": 1.9494050343785317e-05, "loss": 0.5392, "step": 598 }, { "epoch": 0.39, "learning_rate": 1.9491857469223027e-05, "loss": 0.5193, "step": 599 }, { "epoch": 0.39, "learning_rate": 1.9489659976658152e-05, "loss": 0.5424, "step": 600 }, { "epoch": 0.39, "learning_rate": 1.948745786715981e-05, "loss": 0.5283, "step": 601 }, { "epoch": 0.39, "learning_rate": 1.9485251141799387e-05, "loss": 0.5349, "step": 602 }, { "epoch": 0.39, "learning_rate": 1.9483039801650494e-05, "loss": 0.5439, "step": 603 }, { "epoch": 0.39, "learning_rate": 1.9480823847789007e-05, "loss": 0.5412, "step": 604 }, { "epoch": 0.39, "learning_rate": 1.9478603281293037e-05, "loss": 0.5059, "step": 605 }, { "epoch": 0.39, "learning_rate": 1.9476378103242934e-05, "loss": 0.5328, "step": 606 }, { "epoch": 0.39, "learning_rate": 1.94741483147213e-05, "loss": 0.5072, "step": 607 }, { "epoch": 0.39, "learning_rate": 1.947191391681298e-05, "loss": 0.5313, "step": 608 }, { "epoch": 0.39, "learning_rate": 1.9469674910605053e-05, "loss": 0.5252, "step": 609 }, { "epoch": 0.39, "learning_rate": 1.946743129718685e-05, "loss": 0.4906, "step": 610 }, { "epoch": 0.39, "learning_rate": 1.9465183077649938e-05, "loss": 0.5487, "step": 611 }, { "epoch": 0.4, "learning_rate": 1.946293025308813e-05, "loss": 0.556, "step": 612 }, { "epoch": 0.4, "learning_rate": 1.9460672824597472e-05, "loss": 0.5563, "step": 613 }, { "epoch": 0.4, "learning_rate": 1.9458410793276256e-05, "loss": 0.5186, "step": 614 }, { "epoch": 0.4, "learning_rate": 1.945614416022501e-05, "loss": 0.5315, "step": 615 }, { "epoch": 0.4, "learning_rate": 1.9453872926546505e-05, "loss": 0.527, "step": 616 }, { "epoch": 0.4, "learning_rate": 1.945159709334574e-05, "loss": 0.5314, "step": 617 }, { "epoch": 0.4, "learning_rate": 1.944931666172997e-05, "loss": 0.5245, "step": 618 }, { "epoch": 0.4, "learning_rate": 1.9447031632808662e-05, "loss": 0.5349, "step": 619 }, { "epoch": 0.4, "learning_rate": 1.944474200769355e-05, "loss": 0.5257, "step": 620 }, { "epoch": 0.4, "learning_rate": 1.944244778749858e-05, "loss": 0.5106, "step": 621 }, { "epoch": 0.4, "learning_rate": 1.9440148973339937e-05, "loss": 0.5034, "step": 622 }, { "epoch": 0.4, "learning_rate": 1.9437845566336056e-05, "loss": 0.5187, "step": 623 }, { "epoch": 0.4, "learning_rate": 1.943553756760759e-05, "loss": 0.5316, "step": 624 }, { "epoch": 0.4, "learning_rate": 1.9433224978277437e-05, "loss": 0.5229, "step": 625 }, { "epoch": 0.4, "learning_rate": 1.943090779947072e-05, "loss": 0.56, "step": 626 }, { "epoch": 0.4, "learning_rate": 1.94285860323148e-05, "loss": 0.5181, "step": 627 }, { "epoch": 0.41, "learning_rate": 1.9426259677939264e-05, "loss": 0.5406, "step": 628 }, { "epoch": 0.41, "learning_rate": 1.9423928737475946e-05, "loss": 0.546, "step": 629 }, { "epoch": 0.41, "learning_rate": 1.9421593212058894e-05, "loss": 0.501, "step": 630 }, { "epoch": 0.41, "learning_rate": 1.9419253102824393e-05, "loss": 0.5548, "step": 631 }, { "epoch": 0.41, "learning_rate": 1.9416908410910965e-05, "loss": 0.5203, "step": 632 }, { "epoch": 0.41, "learning_rate": 1.941455913745935e-05, "loss": 0.5015, "step": 633 }, { "epoch": 0.41, "learning_rate": 1.9412205283612527e-05, "loss": 0.5095, "step": 634 }, { "epoch": 0.41, "learning_rate": 1.9409846850515694e-05, "loss": 0.507, "step": 635 }, { "epoch": 0.41, "learning_rate": 1.9407483839316284e-05, "loss": 0.5094, "step": 636 }, { "epoch": 0.41, "learning_rate": 1.9405116251163956e-05, "loss": 0.5266, "step": 637 }, { "epoch": 0.41, "learning_rate": 1.9402744087210594e-05, "loss": 0.519, "step": 638 }, { "epoch": 0.41, "learning_rate": 1.940036734861031e-05, "loss": 0.5462, "step": 639 }, { "epoch": 0.41, "learning_rate": 1.939798603651944e-05, "loss": 0.5288, "step": 640 }, { "epoch": 0.41, "learning_rate": 1.9395600152096547e-05, "loss": 0.5235, "step": 641 }, { "epoch": 0.41, "learning_rate": 1.9393209696502416e-05, "loss": 0.5068, "step": 642 }, { "epoch": 0.42, "learning_rate": 1.939081467090006e-05, "loss": 0.5055, "step": 643 }, { "epoch": 0.42, "learning_rate": 1.938841507645471e-05, "loss": 0.4918, "step": 644 }, { "epoch": 0.42, "learning_rate": 1.9386010914333826e-05, "loss": 0.5203, "step": 645 }, { "epoch": 0.42, "learning_rate": 1.9383602185707082e-05, "loss": 0.5184, "step": 646 }, { "epoch": 0.42, "learning_rate": 1.9381188891746384e-05, "loss": 0.5326, "step": 647 }, { "epoch": 0.42, "learning_rate": 1.9378771033625855e-05, "loss": 0.5026, "step": 648 }, { "epoch": 0.42, "learning_rate": 1.937634861252183e-05, "loss": 0.492, "step": 649 }, { "epoch": 0.42, "learning_rate": 1.9373921629612876e-05, "loss": 0.5208, "step": 650 }, { "epoch": 0.42, "learning_rate": 1.937149008607978e-05, "loss": 0.5311, "step": 651 }, { "epoch": 0.42, "learning_rate": 1.9369053983105533e-05, "loss": 0.5253, "step": 652 }, { "epoch": 0.42, "learning_rate": 1.9366613321875357e-05, "loss": 0.5196, "step": 653 }, { "epoch": 0.42, "learning_rate": 1.9364168103576696e-05, "loss": 0.506, "step": 654 }, { "epoch": 0.42, "learning_rate": 1.9361718329399194e-05, "loss": 0.4883, "step": 655 }, { "epoch": 0.42, "learning_rate": 1.9359264000534726e-05, "loss": 0.5371, "step": 656 }, { "epoch": 0.42, "learning_rate": 1.9356805118177377e-05, "loss": 0.5559, "step": 657 }, { "epoch": 0.42, "learning_rate": 1.935434168352345e-05, "loss": 0.5216, "step": 658 }, { "epoch": 0.43, "learning_rate": 1.935187369777146e-05, "loss": 0.5213, "step": 659 }, { "epoch": 0.43, "learning_rate": 1.934940116212214e-05, "loss": 0.548, "step": 660 }, { "epoch": 0.43, "learning_rate": 1.9346924077778433e-05, "loss": 0.5042, "step": 661 }, { "epoch": 0.43, "learning_rate": 1.93444424459455e-05, "loss": 0.5497, "step": 662 }, { "epoch": 0.43, "learning_rate": 1.9341956267830698e-05, "loss": 0.5188, "step": 663 }, { "epoch": 0.43, "learning_rate": 1.9339465544643623e-05, "loss": 0.5258, "step": 664 }, { "epoch": 0.43, "learning_rate": 1.9336970277596064e-05, "loss": 0.5246, "step": 665 }, { "epoch": 0.43, "learning_rate": 1.9334470467902024e-05, "loss": 0.5211, "step": 666 }, { "epoch": 0.43, "learning_rate": 1.9331966116777713e-05, "loss": 0.5291, "step": 667 }, { "epoch": 0.43, "learning_rate": 1.9329457225441554e-05, "loss": 0.5272, "step": 668 }, { "epoch": 0.43, "learning_rate": 1.932694379511419e-05, "loss": 0.4845, "step": 669 }, { "epoch": 0.43, "learning_rate": 1.9324425827018452e-05, "loss": 0.5061, "step": 670 }, { "epoch": 0.43, "learning_rate": 1.9321903322379388e-05, "loss": 0.5369, "step": 671 }, { "epoch": 0.43, "learning_rate": 1.9319376282424255e-05, "loss": 0.5143, "step": 672 }, { "epoch": 0.43, "learning_rate": 1.931684470838252e-05, "loss": 0.5188, "step": 673 }, { "epoch": 0.44, "learning_rate": 1.9314308601485842e-05, "loss": 0.5346, "step": 674 }, { "epoch": 0.44, "learning_rate": 1.9311767962968102e-05, "loss": 0.5272, "step": 675 }, { "epoch": 0.44, "learning_rate": 1.9309222794065373e-05, "loss": 0.5136, "step": 676 }, { "epoch": 0.44, "learning_rate": 1.9306673096015937e-05, "loss": 0.5096, "step": 677 }, { "epoch": 0.44, "learning_rate": 1.9304118870060283e-05, "loss": 0.5038, "step": 678 }, { "epoch": 0.44, "learning_rate": 1.9301560117441097e-05, "loss": 0.4892, "step": 679 }, { "epoch": 0.44, "learning_rate": 1.929899683940327e-05, "loss": 0.5298, "step": 680 }, { "epoch": 0.44, "learning_rate": 1.9296429037193894e-05, "loss": 0.5057, "step": 681 }, { "epoch": 0.44, "learning_rate": 1.9293856712062267e-05, "loss": 0.4937, "step": 682 }, { "epoch": 0.44, "learning_rate": 1.929127986525988e-05, "loss": 0.5471, "step": 683 }, { "epoch": 0.44, "learning_rate": 1.9288698498040423e-05, "loss": 0.5154, "step": 684 }, { "epoch": 0.44, "learning_rate": 1.9286112611659795e-05, "loss": 0.5058, "step": 685 }, { "epoch": 0.44, "learning_rate": 1.9283522207376088e-05, "loss": 0.5241, "step": 686 }, { "epoch": 0.44, "learning_rate": 1.9280927286449586e-05, "loss": 0.5174, "step": 687 }, { "epoch": 0.44, "learning_rate": 1.9278327850142783e-05, "loss": 0.539, "step": 688 }, { "epoch": 0.44, "learning_rate": 1.927572389972036e-05, "loss": 0.5153, "step": 689 }, { "epoch": 0.45, "learning_rate": 1.9273115436449198e-05, "loss": 0.5586, "step": 690 }, { "epoch": 0.45, "learning_rate": 1.9270502461598377e-05, "loss": 0.5349, "step": 691 }, { "epoch": 0.45, "learning_rate": 1.9267884976439163e-05, "loss": 0.522, "step": 692 }, { "epoch": 0.45, "learning_rate": 1.9265262982245023e-05, "loss": 0.5196, "step": 693 }, { "epoch": 0.45, "learning_rate": 1.9262636480291618e-05, "loss": 0.5127, "step": 694 }, { "epoch": 0.45, "learning_rate": 1.92600054718568e-05, "loss": 0.5202, "step": 695 }, { "epoch": 0.45, "learning_rate": 1.9257369958220612e-05, "loss": 0.5104, "step": 696 }, { "epoch": 0.45, "learning_rate": 1.925472994066529e-05, "loss": 0.52, "step": 697 }, { "epoch": 0.45, "learning_rate": 1.9252085420475263e-05, "loss": 0.5268, "step": 698 }, { "epoch": 0.45, "learning_rate": 1.9249436398937154e-05, "loss": 0.5193, "step": 699 }, { "epoch": 0.45, "learning_rate": 1.9246782877339767e-05, "loss": 0.5164, "step": 700 }, { "epoch": 0.45, "learning_rate": 1.9244124856974102e-05, "loss": 0.5281, "step": 701 }, { "epoch": 0.45, "learning_rate": 1.9241462339133342e-05, "loss": 0.5348, "step": 702 }, { "epoch": 0.45, "learning_rate": 1.9238795325112867e-05, "loss": 0.5222, "step": 703 }, { "epoch": 0.45, "learning_rate": 1.923612381621024e-05, "loss": 0.5289, "step": 704 }, { "epoch": 0.46, "learning_rate": 1.9233447813725206e-05, "loss": 0.5146, "step": 705 }, { "epoch": 0.46, "learning_rate": 1.92307673189597e-05, "loss": 0.5453, "step": 706 }, { "epoch": 0.46, "learning_rate": 1.922808233321785e-05, "loss": 0.5191, "step": 707 }, { "epoch": 0.46, "learning_rate": 1.9225392857805955e-05, "loss": 0.5315, "step": 708 }, { "epoch": 0.46, "learning_rate": 1.9222698894032508e-05, "loss": 0.5296, "step": 709 }, { "epoch": 0.46, "learning_rate": 1.9220000443208183e-05, "loss": 0.4835, "step": 710 }, { "epoch": 0.46, "learning_rate": 1.9217297506645837e-05, "loss": 0.5348, "step": 711 }, { "epoch": 0.46, "learning_rate": 1.921459008566051e-05, "loss": 0.5128, "step": 712 }, { "epoch": 0.46, "learning_rate": 1.921187818156942e-05, "loss": 0.5104, "step": 713 }, { "epoch": 0.46, "learning_rate": 1.9209161795691975e-05, "loss": 0.5354, "step": 714 }, { "epoch": 0.46, "learning_rate": 1.9206440929349756e-05, "loss": 0.5066, "step": 715 }, { "epoch": 0.46, "learning_rate": 1.9203715583866527e-05, "loss": 0.4869, "step": 716 }, { "epoch": 0.46, "learning_rate": 1.9200985760568227e-05, "loss": 0.4945, "step": 717 }, { "epoch": 0.46, "learning_rate": 1.9198251460782974e-05, "loss": 0.5308, "step": 718 }, { "epoch": 0.46, "learning_rate": 1.9195512685841072e-05, "loss": 0.5369, "step": 719 }, { "epoch": 0.46, "learning_rate": 1.9192769437075e-05, "loss": 0.5183, "step": 720 }, { "epoch": 0.47, "learning_rate": 1.91900217158194e-05, "loss": 0.5246, "step": 721 }, { "epoch": 0.47, "learning_rate": 1.9187269523411108e-05, "loss": 0.5276, "step": 722 }, { "epoch": 0.47, "learning_rate": 1.9184512861189123e-05, "loss": 0.5251, "step": 723 }, { "epoch": 0.47, "learning_rate": 1.918175173049463e-05, "loss": 0.5464, "step": 724 }, { "epoch": 0.47, "learning_rate": 1.9178986132670973e-05, "loss": 0.5311, "step": 725 }, { "epoch": 0.47, "learning_rate": 1.9176216069063683e-05, "loss": 0.5158, "step": 726 }, { "epoch": 0.47, "learning_rate": 1.9173441541020456e-05, "loss": 0.5181, "step": 727 }, { "epoch": 0.47, "learning_rate": 1.9170662549891162e-05, "loss": 0.5258, "step": 728 }, { "epoch": 0.47, "learning_rate": 1.916787909702785e-05, "loss": 0.5152, "step": 729 }, { "epoch": 0.47, "learning_rate": 1.9165091183784722e-05, "loss": 0.5258, "step": 730 }, { "epoch": 0.47, "learning_rate": 1.9162298811518166e-05, "loss": 0.5436, "step": 731 }, { "epoch": 0.47, "learning_rate": 1.9159501981586738e-05, "loss": 0.521, "step": 732 }, { "epoch": 0.47, "learning_rate": 1.9156700695351148e-05, "loss": 0.514, "step": 733 }, { "epoch": 0.47, "learning_rate": 1.9153894954174294e-05, "loss": 0.5004, "step": 734 }, { "epoch": 0.47, "learning_rate": 1.915108475942123e-05, "loss": 0.5062, "step": 735 }, { "epoch": 0.48, "learning_rate": 1.9148270112459178e-05, "loss": 0.5086, "step": 736 }, { "epoch": 0.48, "learning_rate": 1.9145451014657527e-05, "loss": 0.5314, "step": 737 }, { "epoch": 0.48, "learning_rate": 1.9142627467387833e-05, "loss": 0.5114, "step": 738 }, { "epoch": 0.48, "learning_rate": 1.9139799472023814e-05, "loss": 0.5255, "step": 739 }, { "epoch": 0.48, "learning_rate": 1.9136967029941354e-05, "loss": 0.5159, "step": 740 }, { "epoch": 0.48, "learning_rate": 1.91341301425185e-05, "loss": 0.5295, "step": 741 }, { "epoch": 0.48, "learning_rate": 1.9131288811135465e-05, "loss": 0.5263, "step": 742 }, { "epoch": 0.48, "learning_rate": 1.9128443037174615e-05, "loss": 0.5009, "step": 743 }, { "epoch": 0.48, "learning_rate": 1.9125592822020485e-05, "loss": 0.52, "step": 744 }, { "epoch": 0.48, "learning_rate": 1.9122738167059773e-05, "loss": 0.5326, "step": 745 }, { "epoch": 0.48, "learning_rate": 1.9119879073681328e-05, "loss": 0.5362, "step": 746 }, { "epoch": 0.48, "learning_rate": 1.9117015543276163e-05, "loss": 0.5202, "step": 747 }, { "epoch": 0.48, "learning_rate": 1.9114147577237452e-05, "loss": 0.5316, "step": 748 }, { "epoch": 0.48, "learning_rate": 1.9111275176960527e-05, "loss": 0.5556, "step": 749 }, { "epoch": 0.48, "learning_rate": 1.9108398343842873e-05, "loss": 0.4871, "step": 750 }, { "epoch": 0.48, "learning_rate": 1.9105517079284136e-05, "loss": 0.5224, "step": 751 }, { "epoch": 0.49, "learning_rate": 1.9102631384686116e-05, "loss": 0.5302, "step": 752 }, { "epoch": 0.49, "learning_rate": 1.9099741261452764e-05, "loss": 0.5059, "step": 753 }, { "epoch": 0.49, "learning_rate": 1.9096846710990192e-05, "loss": 0.5189, "step": 754 }, { "epoch": 0.49, "learning_rate": 1.909394773470667e-05, "loss": 0.5161, "step": 755 }, { "epoch": 0.49, "learning_rate": 1.909104433401261e-05, "loss": 0.4971, "step": 756 }, { "epoch": 0.49, "learning_rate": 1.908813651032058e-05, "loss": 0.5383, "step": 757 }, { "epoch": 0.49, "learning_rate": 1.908522426504531e-05, "loss": 0.5312, "step": 758 }, { "epoch": 0.49, "learning_rate": 1.9082307599603664e-05, "loss": 0.5169, "step": 759 }, { "epoch": 0.49, "learning_rate": 1.9079386515414667e-05, "loss": 0.5395, "step": 760 }, { "epoch": 0.49, "learning_rate": 1.9076461013899495e-05, "loss": 0.511, "step": 761 }, { "epoch": 0.49, "learning_rate": 1.9073531096481475e-05, "loss": 0.5164, "step": 762 }, { "epoch": 0.49, "learning_rate": 1.9070596764586066e-05, "loss": 0.5313, "step": 763 }, { "epoch": 0.49, "learning_rate": 1.9067658019640897e-05, "loss": 0.5028, "step": 764 }, { "epoch": 0.49, "learning_rate": 1.906471486307573e-05, "loss": 0.4968, "step": 765 }, { "epoch": 0.49, "learning_rate": 1.9061767296322477e-05, "loss": 0.5243, "step": 766 }, { "epoch": 0.5, "learning_rate": 1.9058815320815188e-05, "loss": 0.5294, "step": 767 }, { "epoch": 0.5, "learning_rate": 1.9055858937990083e-05, "loss": 0.531, "step": 768 }, { "epoch": 0.5, "learning_rate": 1.9052898149285494e-05, "loss": 0.5058, "step": 769 }, { "epoch": 0.5, "learning_rate": 1.9049932956141917e-05, "loss": 0.5519, "step": 770 }, { "epoch": 0.5, "learning_rate": 1.9046963360001984e-05, "loss": 0.519, "step": 771 }, { "epoch": 0.5, "learning_rate": 1.9043989362310472e-05, "loss": 0.5345, "step": 772 }, { "epoch": 0.5, "learning_rate": 1.9041010964514296e-05, "loss": 0.5451, "step": 773 }, { "epoch": 0.5, "learning_rate": 1.9038028168062517e-05, "loss": 0.5131, "step": 774 }, { "epoch": 0.5, "learning_rate": 1.903504097440633e-05, "loss": 0.5199, "step": 775 }, { "epoch": 0.5, "learning_rate": 1.903204938499907e-05, "loss": 0.5356, "step": 776 }, { "epoch": 0.5, "learning_rate": 1.9029053401296216e-05, "loss": 0.5213, "step": 777 }, { "epoch": 0.5, "learning_rate": 1.9026053024755384e-05, "loss": 0.5153, "step": 778 }, { "epoch": 0.5, "learning_rate": 1.902304825683632e-05, "loss": 0.5268, "step": 779 }, { "epoch": 0.5, "learning_rate": 1.902003909900091e-05, "loss": 0.549, "step": 780 }, { "epoch": 0.5, "learning_rate": 1.9017025552713182e-05, "loss": 0.5052, "step": 781 }, { "epoch": 0.5, "learning_rate": 1.901400761943929e-05, "loss": 0.5019, "step": 782 }, { "epoch": 0.51, "learning_rate": 1.9010985300647528e-05, "loss": 0.4966, "step": 783 }, { "epoch": 0.51, "learning_rate": 1.9007958597808326e-05, "loss": 0.5468, "step": 784 }, { "epoch": 0.51, "learning_rate": 1.900492751239424e-05, "loss": 0.5281, "step": 785 }, { "epoch": 0.51, "learning_rate": 1.9001892045879963e-05, "loss": 0.5306, "step": 786 }, { "epoch": 0.51, "learning_rate": 1.8998852199742313e-05, "loss": 0.5249, "step": 787 }, { "epoch": 0.51, "learning_rate": 1.8995807975460246e-05, "loss": 0.5531, "step": 788 }, { "epoch": 0.51, "learning_rate": 1.899275937451485e-05, "loss": 0.5122, "step": 789 }, { "epoch": 0.51, "learning_rate": 1.8989706398389335e-05, "loss": 0.5186, "step": 790 }, { "epoch": 0.51, "learning_rate": 1.8986649048569038e-05, "loss": 0.5046, "step": 791 }, { "epoch": 0.51, "learning_rate": 1.8983587326541437e-05, "loss": 0.5123, "step": 792 }, { "epoch": 0.51, "learning_rate": 1.8980521233796123e-05, "loss": 0.4946, "step": 793 }, { "epoch": 0.51, "learning_rate": 1.897745077182482e-05, "loss": 0.5415, "step": 794 }, { "epoch": 0.51, "learning_rate": 1.897437594212138e-05, "loss": 0.5111, "step": 795 }, { "epoch": 0.51, "learning_rate": 1.8971296746181774e-05, "loss": 0.5257, "step": 796 }, { "epoch": 0.51, "learning_rate": 1.8968213185504103e-05, "loss": 0.4849, "step": 797 }, { "epoch": 0.52, "learning_rate": 1.8965125261588586e-05, "loss": 0.5204, "step": 798 }, { "epoch": 0.52, "learning_rate": 1.896203297593757e-05, "loss": 0.5327, "step": 799 }, { "epoch": 0.52, "learning_rate": 1.8958936330055516e-05, "loss": 0.5252, "step": 800 }, { "epoch": 0.52, "learning_rate": 1.895583532544902e-05, "loss": 0.4923, "step": 801 }, { "epoch": 0.52, "learning_rate": 1.8952729963626783e-05, "loss": 0.5354, "step": 802 }, { "epoch": 0.52, "learning_rate": 1.894962024609964e-05, "loss": 0.5267, "step": 803 }, { "epoch": 0.52, "learning_rate": 1.894650617438054e-05, "loss": 0.5327, "step": 804 }, { "epoch": 0.52, "learning_rate": 1.8943387749984537e-05, "loss": 0.5399, "step": 805 }, { "epoch": 0.52, "learning_rate": 1.8940264974428827e-05, "loss": 0.5272, "step": 806 }, { "epoch": 0.52, "learning_rate": 1.8937137849232705e-05, "loss": 0.5367, "step": 807 }, { "epoch": 0.52, "learning_rate": 1.893400637591759e-05, "loss": 0.5386, "step": 808 }, { "epoch": 0.52, "learning_rate": 1.8930870556007016e-05, "loss": 0.4968, "step": 809 }, { "epoch": 0.52, "learning_rate": 1.8927730391026625e-05, "loss": 0.5157, "step": 810 }, { "epoch": 0.52, "learning_rate": 1.8924585882504184e-05, "loss": 0.5165, "step": 811 }, { "epoch": 0.52, "learning_rate": 1.8921437031969557e-05, "loss": 0.5248, "step": 812 }, { "epoch": 0.52, "learning_rate": 1.8918283840954745e-05, "loss": 0.4947, "step": 813 }, { "epoch": 0.53, "learning_rate": 1.8915126310993838e-05, "loss": 0.5429, "step": 814 }, { "epoch": 0.53, "learning_rate": 1.8911964443623046e-05, "loss": 0.5162, "step": 815 }, { "epoch": 0.53, "learning_rate": 1.8908798240380692e-05, "loss": 0.5193, "step": 816 }, { "epoch": 0.53, "learning_rate": 1.89056277028072e-05, "loss": 0.5131, "step": 817 }, { "epoch": 0.53, "learning_rate": 1.890245283244511e-05, "loss": 0.5213, "step": 818 }, { "epoch": 0.53, "learning_rate": 1.8899273630839073e-05, "loss": 0.5522, "step": 819 }, { "epoch": 0.53, "learning_rate": 1.8896090099535834e-05, "loss": 0.5026, "step": 820 }, { "epoch": 0.53, "learning_rate": 1.8892902240084257e-05, "loss": 0.5243, "step": 821 }, { "epoch": 0.53, "learning_rate": 1.888971005403531e-05, "loss": 0.5508, "step": 822 }, { "epoch": 0.53, "learning_rate": 1.888651354294206e-05, "loss": 0.5223, "step": 823 }, { "epoch": 0.53, "learning_rate": 1.888331270835968e-05, "loss": 0.539, "step": 824 }, { "epoch": 0.53, "learning_rate": 1.8880107551845453e-05, "loss": 0.5118, "step": 825 }, { "epoch": 0.53, "learning_rate": 1.8876898074958757e-05, "loss": 0.5131, "step": 826 }, { "epoch": 0.53, "learning_rate": 1.8873684279261076e-05, "loss": 0.5211, "step": 827 }, { "epoch": 0.53, "learning_rate": 1.8870466166315992e-05, "loss": 0.5404, "step": 828 }, { "epoch": 0.54, "learning_rate": 1.886724373768919e-05, "loss": 0.5187, "step": 829 }, { "epoch": 0.54, "learning_rate": 1.8864016994948456e-05, "loss": 0.5178, "step": 830 }, { "epoch": 0.54, "learning_rate": 1.8860785939663674e-05, "loss": 0.4933, "step": 831 }, { "epoch": 0.54, "learning_rate": 1.885755057340682e-05, "loss": 0.5214, "step": 832 }, { "epoch": 0.54, "learning_rate": 1.8854310897751976e-05, "loss": 0.4929, "step": 833 }, { "epoch": 0.54, "learning_rate": 1.885106691427532e-05, "loss": 0.5325, "step": 834 }, { "epoch": 0.54, "learning_rate": 1.8847818624555117e-05, "loss": 0.4969, "step": 835 }, { "epoch": 0.54, "learning_rate": 1.8844566030171737e-05, "loss": 0.5249, "step": 836 }, { "epoch": 0.54, "learning_rate": 1.8841309132707644e-05, "loss": 0.5082, "step": 837 }, { "epoch": 0.54, "learning_rate": 1.8838047933747386e-05, "loss": 0.5319, "step": 838 }, { "epoch": 0.54, "learning_rate": 1.883478243487761e-05, "loss": 0.5262, "step": 839 }, { "epoch": 0.54, "learning_rate": 1.8831512637687054e-05, "loss": 0.5309, "step": 840 }, { "epoch": 0.54, "learning_rate": 1.8828238543766554e-05, "loss": 0.5136, "step": 841 }, { "epoch": 0.54, "learning_rate": 1.8824960154709027e-05, "loss": 0.5523, "step": 842 }, { "epoch": 0.54, "learning_rate": 1.8821677472109483e-05, "loss": 0.5047, "step": 843 }, { "epoch": 0.54, "learning_rate": 1.881839049756502e-05, "loss": 0.5187, "step": 844 }, { "epoch": 0.55, "learning_rate": 1.8815099232674827e-05, "loss": 0.5503, "step": 845 }, { "epoch": 0.55, "learning_rate": 1.8811803679040178e-05, "loss": 0.5142, "step": 846 }, { "epoch": 0.55, "learning_rate": 1.8808503838264435e-05, "loss": 0.5394, "step": 847 }, { "epoch": 0.55, "learning_rate": 1.880519971195304e-05, "loss": 0.5194, "step": 848 }, { "epoch": 0.55, "learning_rate": 1.8801891301713535e-05, "loss": 0.505, "step": 849 }, { "epoch": 0.55, "learning_rate": 1.8798578609155528e-05, "loss": 0.55, "step": 850 }, { "epoch": 0.55, "learning_rate": 1.8795261635890716e-05, "loss": 0.5265, "step": 851 }, { "epoch": 0.55, "learning_rate": 1.879194038353289e-05, "loss": 0.525, "step": 852 }, { "epoch": 0.55, "learning_rate": 1.878861485369791e-05, "loss": 0.5396, "step": 853 }, { "epoch": 0.55, "learning_rate": 1.8785285048003722e-05, "loss": 0.4973, "step": 854 }, { "epoch": 0.55, "learning_rate": 1.8781950968070348e-05, "loss": 0.5222, "step": 855 }, { "epoch": 0.55, "learning_rate": 1.87786126155199e-05, "loss": 0.5353, "step": 856 }, { "epoch": 0.55, "learning_rate": 1.8775269991976552e-05, "loss": 0.5061, "step": 857 }, { "epoch": 0.55, "learning_rate": 1.8771923099066573e-05, "loss": 0.5195, "step": 858 }, { "epoch": 0.55, "learning_rate": 1.87685719384183e-05, "loss": 0.519, "step": 859 }, { "epoch": 0.56, "learning_rate": 1.8765216511662153e-05, "loss": 0.5245, "step": 860 }, { "epoch": 0.56, "learning_rate": 1.8761856820430614e-05, "loss": 0.4968, "step": 861 }, { "epoch": 0.56, "learning_rate": 1.875849286635825e-05, "loss": 0.533, "step": 862 }, { "epoch": 0.56, "learning_rate": 1.875512465108171e-05, "loss": 0.5315, "step": 863 }, { "epoch": 0.56, "learning_rate": 1.8751752176239693e-05, "loss": 0.5213, "step": 864 }, { "epoch": 0.56, "learning_rate": 1.8748375443472993e-05, "loss": 0.5323, "step": 865 }, { "epoch": 0.56, "learning_rate": 1.8744994454424463e-05, "loss": 0.5143, "step": 866 }, { "epoch": 0.56, "learning_rate": 1.8741609210739034e-05, "loss": 0.5116, "step": 867 }, { "epoch": 0.56, "learning_rate": 1.87382197140637e-05, "loss": 0.5158, "step": 868 }, { "epoch": 0.56, "learning_rate": 1.8734825966047528e-05, "loss": 0.5482, "step": 869 }, { "epoch": 0.56, "learning_rate": 1.8731427968341654e-05, "loss": 0.5203, "step": 870 }, { "epoch": 0.56, "learning_rate": 1.872802572259928e-05, "loss": 0.5153, "step": 871 }, { "epoch": 0.56, "learning_rate": 1.8724619230475675e-05, "loss": 0.5275, "step": 872 }, { "epoch": 0.56, "learning_rate": 1.8721208493628174e-05, "loss": 0.5034, "step": 873 }, { "epoch": 0.56, "learning_rate": 1.871779351371618e-05, "loss": 0.5227, "step": 874 }, { "epoch": 0.56, "learning_rate": 1.871437429240115e-05, "loss": 0.5195, "step": 875 }, { "epoch": 0.57, "learning_rate": 1.8710950831346623e-05, "loss": 0.5445, "step": 876 }, { "epoch": 0.57, "learning_rate": 1.870752313221818e-05, "loss": 0.5076, "step": 877 }, { "epoch": 0.57, "learning_rate": 1.870409119668348e-05, "loss": 0.53, "step": 878 }, { "epoch": 0.57, "learning_rate": 1.8700655026412238e-05, "loss": 0.5135, "step": 879 }, { "epoch": 0.57, "learning_rate": 1.8697214623076222e-05, "loss": 0.5071, "step": 880 }, { "epoch": 0.57, "learning_rate": 1.8693769988349274e-05, "loss": 0.5328, "step": 881 }, { "epoch": 0.57, "learning_rate": 1.8690321123907277e-05, "loss": 0.4843, "step": 882 }, { "epoch": 0.57, "learning_rate": 1.868686803142819e-05, "loss": 0.4909, "step": 883 }, { "epoch": 0.57, "learning_rate": 1.8683410712592015e-05, "loss": 0.5035, "step": 884 }, { "epoch": 0.57, "learning_rate": 1.8679949169080815e-05, "loss": 0.5364, "step": 885 }, { "epoch": 0.57, "learning_rate": 1.8676483402578714e-05, "loss": 0.4987, "step": 886 }, { "epoch": 0.57, "learning_rate": 1.867301341477188e-05, "loss": 0.5152, "step": 887 }, { "epoch": 0.57, "learning_rate": 1.8669539207348544e-05, "loss": 0.5486, "step": 888 }, { "epoch": 0.57, "learning_rate": 1.8666060781998982e-05, "loss": 0.5312, "step": 889 }, { "epoch": 0.57, "learning_rate": 1.8662578140415535e-05, "loss": 0.5142, "step": 890 }, { "epoch": 0.58, "learning_rate": 1.865909128429258e-05, "loss": 0.5089, "step": 891 }, { "epoch": 0.58, "learning_rate": 1.8655600215326547e-05, "loss": 0.5301, "step": 892 }, { "epoch": 0.58, "learning_rate": 1.865210493521593e-05, "loss": 0.5009, "step": 893 }, { "epoch": 0.58, "learning_rate": 1.8648605445661256e-05, "loss": 0.5298, "step": 894 }, { "epoch": 0.58, "learning_rate": 1.86451017483651e-05, "loss": 0.5371, "step": 895 }, { "epoch": 0.58, "learning_rate": 1.8641593845032098e-05, "loss": 0.5245, "step": 896 }, { "epoch": 0.58, "learning_rate": 1.8638081737368928e-05, "loss": 0.5663, "step": 897 }, { "epoch": 0.58, "learning_rate": 1.8634565427084295e-05, "loss": 0.5337, "step": 898 }, { "epoch": 0.58, "learning_rate": 1.8631044915888976e-05, "loss": 0.5119, "step": 899 }, { "epoch": 0.58, "learning_rate": 1.8627520205495772e-05, "loss": 0.5105, "step": 900 }, { "epoch": 0.58, "learning_rate": 1.8623991297619536e-05, "loss": 0.5125, "step": 901 }, { "epoch": 0.58, "learning_rate": 1.8620458193977166e-05, "loss": 0.529, "step": 902 }, { "epoch": 0.58, "learning_rate": 1.861692089628759e-05, "loss": 0.526, "step": 903 }, { "epoch": 0.58, "learning_rate": 1.8613379406271784e-05, "loss": 0.5226, "step": 904 }, { "epoch": 0.58, "learning_rate": 1.8609833725652763e-05, "loss": 0.5131, "step": 905 }, { "epoch": 0.58, "learning_rate": 1.8606283856155585e-05, "loss": 0.486, "step": 906 }, { "epoch": 0.59, "learning_rate": 1.8602729799507335e-05, "loss": 0.5129, "step": 907 }, { "epoch": 0.59, "learning_rate": 1.8599171557437147e-05, "loss": 0.5388, "step": 908 }, { "epoch": 0.59, "learning_rate": 1.859560913167618e-05, "loss": 0.5212, "step": 909 }, { "epoch": 0.59, "learning_rate": 1.859204252395764e-05, "loss": 0.5015, "step": 910 }, { "epoch": 0.59, "learning_rate": 1.858847173601676e-05, "loss": 0.4776, "step": 911 }, { "epoch": 0.59, "learning_rate": 1.858489676959081e-05, "loss": 0.5232, "step": 912 }, { "epoch": 0.59, "learning_rate": 1.858131762641909e-05, "loss": 0.5153, "step": 913 }, { "epoch": 0.59, "learning_rate": 1.8577734308242936e-05, "loss": 0.5099, "step": 914 }, { "epoch": 0.59, "learning_rate": 1.857414681680571e-05, "loss": 0.5218, "step": 915 }, { "epoch": 0.59, "learning_rate": 1.8570555153852806e-05, "loss": 0.515, "step": 916 }, { "epoch": 0.59, "learning_rate": 1.8566959321131656e-05, "loss": 0.506, "step": 917 }, { "epoch": 0.59, "learning_rate": 1.856335932039171e-05, "loss": 0.5418, "step": 918 }, { "epoch": 0.59, "learning_rate": 1.8559755153384448e-05, "loss": 0.495, "step": 919 }, { "epoch": 0.59, "learning_rate": 1.855614682186338e-05, "loss": 0.4858, "step": 920 }, { "epoch": 0.59, "learning_rate": 1.8552534327584038e-05, "loss": 0.541, "step": 921 }, { "epoch": 0.6, "learning_rate": 1.8548917672303987e-05, "loss": 0.5309, "step": 922 }, { "epoch": 0.6, "learning_rate": 1.8545296857782808e-05, "loss": 0.5, "step": 923 }, { "epoch": 0.6, "learning_rate": 1.8541671885782106e-05, "loss": 0.5141, "step": 924 }, { "epoch": 0.6, "learning_rate": 1.853804275806552e-05, "loss": 0.5458, "step": 925 }, { "epoch": 0.6, "learning_rate": 1.8534409476398693e-05, "loss": 0.5165, "step": 926 }, { "epoch": 0.6, "learning_rate": 1.8530772042549303e-05, "loss": 0.5256, "step": 927 }, { "epoch": 0.6, "learning_rate": 1.8527130458287047e-05, "loss": 0.5246, "step": 928 }, { "epoch": 0.6, "learning_rate": 1.852348472538363e-05, "loss": 0.5105, "step": 929 }, { "epoch": 0.6, "learning_rate": 1.851983484561279e-05, "loss": 0.5209, "step": 930 }, { "epoch": 0.6, "learning_rate": 1.851618082075027e-05, "loss": 0.5114, "step": 931 }, { "epoch": 0.6, "learning_rate": 1.851252265257384e-05, "loss": 0.5475, "step": 932 }, { "epoch": 0.6, "learning_rate": 1.8508860342863283e-05, "loss": 0.5389, "step": 933 }, { "epoch": 0.6, "learning_rate": 1.850519389340039e-05, "loss": 0.5055, "step": 934 }, { "epoch": 0.6, "learning_rate": 1.850152330596897e-05, "loss": 0.5156, "step": 935 }, { "epoch": 0.6, "learning_rate": 1.8497848582354852e-05, "loss": 0.5216, "step": 936 }, { "epoch": 0.6, "learning_rate": 1.8494169724345868e-05, "loss": 0.5077, "step": 937 }, { "epoch": 0.61, "learning_rate": 1.849048673373187e-05, "loss": 0.5299, "step": 938 }, { "epoch": 0.61, "learning_rate": 1.848679961230471e-05, "loss": 0.5328, "step": 939 }, { "epoch": 0.61, "learning_rate": 1.8483108361858263e-05, "loss": 0.5339, "step": 940 }, { "epoch": 0.61, "learning_rate": 1.84794129841884e-05, "loss": 0.529, "step": 941 }, { "epoch": 0.61, "learning_rate": 1.8475713481093005e-05, "loss": 0.5005, "step": 942 }, { "epoch": 0.61, "learning_rate": 1.8472009854371973e-05, "loss": 0.4962, "step": 943 }, { "epoch": 0.61, "learning_rate": 1.8468302105827195e-05, "loss": 0.5326, "step": 944 }, { "epoch": 0.61, "learning_rate": 1.8464590237262584e-05, "loss": 0.5108, "step": 945 }, { "epoch": 0.61, "learning_rate": 1.8460874250484045e-05, "loss": 0.5204, "step": 946 }, { "epoch": 0.61, "learning_rate": 1.8457154147299484e-05, "loss": 0.5247, "step": 947 }, { "epoch": 0.61, "learning_rate": 1.845342992951882e-05, "loss": 0.5007, "step": 948 }, { "epoch": 0.61, "learning_rate": 1.8449701598953965e-05, "loss": 0.5375, "step": 949 }, { "epoch": 0.61, "learning_rate": 1.8445969157418845e-05, "loss": 0.4983, "step": 950 }, { "epoch": 0.61, "learning_rate": 1.8442232606729363e-05, "loss": 0.5422, "step": 951 }, { "epoch": 0.61, "learning_rate": 1.8438491948703445e-05, "loss": 0.4993, "step": 952 }, { "epoch": 0.62, "learning_rate": 1.8434747185161003e-05, "loss": 0.5387, "step": 953 }, { "epoch": 0.62, "learning_rate": 1.843099831792395e-05, "loss": 0.5283, "step": 954 }, { "epoch": 0.62, "learning_rate": 1.842724534881619e-05, "loss": 0.5332, "step": 955 }, { "epoch": 0.62, "learning_rate": 1.842348827966363e-05, "loss": 0.5272, "step": 956 }, { "epoch": 0.62, "learning_rate": 1.841972711229417e-05, "loss": 0.5037, "step": 957 }, { "epoch": 0.62, "learning_rate": 1.8415961848537702e-05, "loss": 0.5511, "step": 958 }, { "epoch": 0.62, "learning_rate": 1.8412192490226113e-05, "loss": 0.5265, "step": 959 }, { "epoch": 0.62, "learning_rate": 1.840841903919328e-05, "loss": 0.4896, "step": 960 }, { "epoch": 0.62, "learning_rate": 1.8404641497275065e-05, "loss": 0.5332, "step": 961 }, { "epoch": 0.62, "learning_rate": 1.8400859866309337e-05, "loss": 0.5106, "step": 962 }, { "epoch": 0.62, "learning_rate": 1.839707414813594e-05, "loss": 0.5406, "step": 963 }, { "epoch": 0.62, "learning_rate": 1.8393284344596715e-05, "loss": 0.5167, "step": 964 }, { "epoch": 0.62, "learning_rate": 1.8389490457535482e-05, "loss": 0.5003, "step": 965 }, { "epoch": 0.62, "learning_rate": 1.8385692488798056e-05, "loss": 0.5014, "step": 966 }, { "epoch": 0.62, "learning_rate": 1.838189044023223e-05, "loss": 0.5343, "step": 967 }, { "epoch": 0.62, "learning_rate": 1.837808431368779e-05, "loss": 0.5372, "step": 968 }, { "epoch": 0.63, "learning_rate": 1.8374274111016502e-05, "loss": 0.5435, "step": 969 }, { "epoch": 0.63, "learning_rate": 1.8370459834072118e-05, "loss": 0.5276, "step": 970 }, { "epoch": 0.63, "learning_rate": 1.8366641484710362e-05, "loss": 0.5148, "step": 971 }, { "epoch": 0.63, "learning_rate": 1.8362819064788956e-05, "loss": 0.534, "step": 972 }, { "epoch": 0.63, "learning_rate": 1.835899257616759e-05, "loss": 0.5043, "step": 973 }, { "epoch": 0.63, "learning_rate": 1.8355162020707932e-05, "loss": 0.4994, "step": 974 }, { "epoch": 0.63, "learning_rate": 1.835132740027364e-05, "loss": 0.5301, "step": 975 }, { "epoch": 0.63, "learning_rate": 1.8347488716730343e-05, "loss": 0.5348, "step": 976 }, { "epoch": 0.63, "learning_rate": 1.8343645971945646e-05, "loss": 0.5275, "step": 977 }, { "epoch": 0.63, "learning_rate": 1.8339799167789127e-05, "loss": 0.5139, "step": 978 }, { "epoch": 0.63, "learning_rate": 1.8335948306132344e-05, "loss": 0.5405, "step": 979 }, { "epoch": 0.63, "learning_rate": 1.8332093388848836e-05, "loss": 0.4917, "step": 980 }, { "epoch": 0.63, "learning_rate": 1.8328234417814097e-05, "loss": 0.5296, "step": 981 }, { "epoch": 0.63, "learning_rate": 1.8324371394905606e-05, "loss": 0.5105, "step": 982 }, { "epoch": 0.63, "learning_rate": 1.8320504322002817e-05, "loss": 0.4956, "step": 983 }, { "epoch": 0.64, "learning_rate": 1.8316633200987143e-05, "loss": 0.5166, "step": 984 }, { "epoch": 0.64, "learning_rate": 1.8312758033741968e-05, "loss": 0.525, "step": 985 }, { "epoch": 0.64, "learning_rate": 1.8308878822152655e-05, "loss": 0.5255, "step": 986 }, { "epoch": 0.64, "learning_rate": 1.8304995568106524e-05, "loss": 0.5259, "step": 987 }, { "epoch": 0.64, "learning_rate": 1.830110827349287e-05, "loss": 0.4996, "step": 988 }, { "epoch": 0.64, "learning_rate": 1.8297216940202945e-05, "loss": 0.5129, "step": 989 }, { "epoch": 0.64, "learning_rate": 1.829332157012997e-05, "loss": 0.526, "step": 990 }, { "epoch": 0.64, "learning_rate": 1.828942216516913e-05, "loss": 0.4995, "step": 991 }, { "epoch": 0.64, "learning_rate": 1.8285518727217578e-05, "loss": 0.5211, "step": 992 }, { "epoch": 0.64, "learning_rate": 1.828161125817442e-05, "loss": 0.5458, "step": 993 }, { "epoch": 0.64, "learning_rate": 1.8277699759940732e-05, "loss": 0.4904, "step": 994 }, { "epoch": 0.64, "learning_rate": 1.8273784234419544e-05, "loss": 0.5224, "step": 995 }, { "epoch": 0.64, "learning_rate": 1.8269864683515847e-05, "loss": 0.527, "step": 996 }, { "epoch": 0.64, "learning_rate": 1.8265941109136586e-05, "loss": 0.4823, "step": 997 }, { "epoch": 0.64, "learning_rate": 1.8262013513190677e-05, "loss": 0.5046, "step": 998 }, { "epoch": 0.64, "learning_rate": 1.8258081897588978e-05, "loss": 0.536, "step": 999 }, { "epoch": 0.65, "learning_rate": 1.8254146264244316e-05, "loss": 0.5157, "step": 1000 }, { "epoch": 0.65, "learning_rate": 1.8250206615071457e-05, "loss": 0.5282, "step": 1001 }, { "epoch": 0.65, "learning_rate": 1.824626295198713e-05, "loss": 0.4845, "step": 1002 }, { "epoch": 0.65, "learning_rate": 1.824231527691002e-05, "loss": 0.5398, "step": 1003 }, { "epoch": 0.65, "learning_rate": 1.8238363591760758e-05, "loss": 0.494, "step": 1004 }, { "epoch": 0.65, "learning_rate": 1.823440789846193e-05, "loss": 0.5336, "step": 1005 }, { "epoch": 0.65, "learning_rate": 1.8230448198938067e-05, "loss": 0.5056, "step": 1006 }, { "epoch": 0.65, "learning_rate": 1.822648449511565e-05, "loss": 0.5039, "step": 1007 }, { "epoch": 0.65, "learning_rate": 1.822251678892312e-05, "loss": 0.5233, "step": 1008 }, { "epoch": 0.65, "learning_rate": 1.821854508229084e-05, "loss": 0.4799, "step": 1009 }, { "epoch": 0.65, "learning_rate": 1.821456937715115e-05, "loss": 0.5399, "step": 1010 }, { "epoch": 0.65, "learning_rate": 1.8210589675438314e-05, "loss": 0.5069, "step": 1011 }, { "epoch": 0.65, "learning_rate": 1.8206605979088545e-05, "loss": 0.5176, "step": 1012 }, { "epoch": 0.65, "learning_rate": 1.820261829004e-05, "loss": 0.5164, "step": 1013 }, { "epoch": 0.65, "learning_rate": 1.819862661023278e-05, "loss": 0.5183, "step": 1014 }, { "epoch": 0.66, "learning_rate": 1.8194630941608932e-05, "loss": 0.4893, "step": 1015 }, { "epoch": 0.66, "learning_rate": 1.819063128611244e-05, "loss": 0.4829, "step": 1016 }, { "epoch": 0.66, "learning_rate": 1.8186627645689218e-05, "loss": 0.5074, "step": 1017 }, { "epoch": 0.66, "learning_rate": 1.8182620022287133e-05, "loss": 0.5417, "step": 1018 }, { "epoch": 0.66, "learning_rate": 1.8178608417855984e-05, "loss": 0.5348, "step": 1019 }, { "epoch": 0.66, "learning_rate": 1.8174592834347503e-05, "loss": 0.5378, "step": 1020 }, { "epoch": 0.66, "learning_rate": 1.8170573273715368e-05, "loss": 0.5191, "step": 1021 }, { "epoch": 0.66, "learning_rate": 1.816654973791518e-05, "loss": 0.4881, "step": 1022 }, { "epoch": 0.66, "learning_rate": 1.8162522228904485e-05, "loss": 0.5145, "step": 1023 }, { "epoch": 0.66, "learning_rate": 1.815849074864275e-05, "loss": 0.5256, "step": 1024 }, { "epoch": 0.66, "learning_rate": 1.815445529909139e-05, "loss": 0.5284, "step": 1025 }, { "epoch": 0.66, "learning_rate": 1.8150415882213735e-05, "loss": 0.4991, "step": 1026 }, { "epoch": 0.66, "learning_rate": 1.8146372499975058e-05, "loss": 0.513, "step": 1027 }, { "epoch": 0.66, "learning_rate": 1.814232515434255e-05, "loss": 0.5022, "step": 1028 }, { "epoch": 0.66, "learning_rate": 1.8138273847285338e-05, "loss": 0.5231, "step": 1029 }, { "epoch": 0.66, "learning_rate": 1.8134218580774475e-05, "loss": 0.5012, "step": 1030 }, { "epoch": 0.67, "learning_rate": 1.813015935678294e-05, "loss": 0.5237, "step": 1031 }, { "epoch": 0.67, "learning_rate": 1.8126096177285637e-05, "loss": 0.5188, "step": 1032 }, { "epoch": 0.67, "learning_rate": 1.8122029044259392e-05, "loss": 0.5052, "step": 1033 }, { "epoch": 0.67, "learning_rate": 1.811795795968296e-05, "loss": 0.5018, "step": 1034 }, { "epoch": 0.67, "learning_rate": 1.8113882925537008e-05, "loss": 0.5126, "step": 1035 }, { "epoch": 0.67, "learning_rate": 1.8109803943804146e-05, "loss": 0.5115, "step": 1036 }, { "epoch": 0.67, "learning_rate": 1.8105721016468878e-05, "loss": 0.5347, "step": 1037 }, { "epoch": 0.67, "learning_rate": 1.8101634145517644e-05, "loss": 0.5126, "step": 1038 }, { "epoch": 0.67, "learning_rate": 1.80975433329388e-05, "loss": 0.5369, "step": 1039 }, { "epoch": 0.67, "learning_rate": 1.8093448580722617e-05, "loss": 0.5032, "step": 1040 }, { "epoch": 0.67, "learning_rate": 1.808934989086129e-05, "loss": 0.4957, "step": 1041 }, { "epoch": 0.67, "learning_rate": 1.8085247265348913e-05, "loss": 0.5294, "step": 1042 }, { "epoch": 0.67, "learning_rate": 1.8081140706181514e-05, "loss": 0.4911, "step": 1043 }, { "epoch": 0.67, "learning_rate": 1.8077030215357024e-05, "loss": 0.5171, "step": 1044 }, { "epoch": 0.67, "learning_rate": 1.807291579487529e-05, "loss": 0.5337, "step": 1045 }, { "epoch": 0.68, "learning_rate": 1.8068797446738072e-05, "loss": 0.5144, "step": 1046 }, { "epoch": 0.68, "learning_rate": 1.8064675172949035e-05, "loss": 0.5479, "step": 1047 }, { "epoch": 0.68, "learning_rate": 1.806054897551376e-05, "loss": 0.5253, "step": 1048 }, { "epoch": 0.68, "learning_rate": 1.8056418856439737e-05, "loss": 0.5048, "step": 1049 }, { "epoch": 0.68, "learning_rate": 1.805228481773636e-05, "loss": 0.4952, "step": 1050 }, { "epoch": 0.68, "learning_rate": 1.8048146861414933e-05, "loss": 0.4907, "step": 1051 }, { "epoch": 0.68, "learning_rate": 1.8044004989488662e-05, "loss": 0.5216, "step": 1052 }, { "epoch": 0.68, "learning_rate": 1.803985920397267e-05, "loss": 0.5049, "step": 1053 }, { "epoch": 0.68, "learning_rate": 1.8035709506883962e-05, "loss": 0.5135, "step": 1054 }, { "epoch": 0.68, "learning_rate": 1.803155590024147e-05, "loss": 0.5291, "step": 1055 }, { "epoch": 0.68, "learning_rate": 1.802739838606601e-05, "loss": 0.5104, "step": 1056 }, { "epoch": 0.68, "learning_rate": 1.8023236966380312e-05, "loss": 0.5054, "step": 1057 }, { "epoch": 0.68, "learning_rate": 1.8019071643208996e-05, "loss": 0.5083, "step": 1058 }, { "epoch": 0.68, "learning_rate": 1.8014902418578588e-05, "loss": 0.534, "step": 1059 }, { "epoch": 0.68, "learning_rate": 1.801072929451751e-05, "loss": 0.5085, "step": 1060 }, { "epoch": 0.69, "learning_rate": 1.800655227305608e-05, "loss": 0.523, "step": 1061 }, { "epoch": 0.69, "learning_rate": 1.8002371356226512e-05, "loss": 0.519, "step": 1062 }, { "epoch": 0.69, "learning_rate": 1.799818654606292e-05, "loss": 0.5452, "step": 1063 }, { "epoch": 0.69, "learning_rate": 1.7993997844601305e-05, "loss": 0.504, "step": 1064 }, { "epoch": 0.69, "learning_rate": 1.798980525387956e-05, "loss": 0.4886, "step": 1065 }, { "epoch": 0.69, "learning_rate": 1.7985608775937492e-05, "loss": 0.5289, "step": 1066 }, { "epoch": 0.69, "learning_rate": 1.7981408412816762e-05, "loss": 0.5087, "step": 1067 }, { "epoch": 0.69, "learning_rate": 1.7977204166560954e-05, "loss": 0.513, "step": 1068 }, { "epoch": 0.69, "learning_rate": 1.7972996039215523e-05, "loss": 0.4867, "step": 1069 }, { "epoch": 0.69, "learning_rate": 1.796878403282782e-05, "loss": 0.5551, "step": 1070 }, { "epoch": 0.69, "learning_rate": 1.7964568149447078e-05, "loss": 0.5326, "step": 1071 }, { "epoch": 0.69, "learning_rate": 1.7960348391124422e-05, "loss": 0.5351, "step": 1072 }, { "epoch": 0.69, "learning_rate": 1.7956124759912862e-05, "loss": 0.5282, "step": 1073 }, { "epoch": 0.69, "learning_rate": 1.7951897257867284e-05, "loss": 0.5109, "step": 1074 }, { "epoch": 0.69, "learning_rate": 1.7947665887044468e-05, "loss": 0.4869, "step": 1075 }, { "epoch": 0.69, "learning_rate": 1.7943430649503065e-05, "loss": 0.5301, "step": 1076 }, { "epoch": 0.7, "learning_rate": 1.793919154730362e-05, "loss": 0.5078, "step": 1077 }, { "epoch": 0.7, "learning_rate": 1.7934948582508554e-05, "loss": 0.5214, "step": 1078 }, { "epoch": 0.7, "learning_rate": 1.793070175718216e-05, "loss": 0.5383, "step": 1079 }, { "epoch": 0.7, "learning_rate": 1.7926451073390612e-05, "loss": 0.5209, "step": 1080 }, { "epoch": 0.7, "learning_rate": 1.7922196533201967e-05, "loss": 0.5077, "step": 1081 }, { "epoch": 0.7, "learning_rate": 1.7917938138686152e-05, "loss": 0.5375, "step": 1082 }, { "epoch": 0.7, "learning_rate": 1.7913675891914976e-05, "loss": 0.5061, "step": 1083 }, { "epoch": 0.7, "learning_rate": 1.7909409794962115e-05, "loss": 0.5184, "step": 1084 }, { "epoch": 0.7, "learning_rate": 1.790513984990312e-05, "loss": 0.4921, "step": 1085 }, { "epoch": 0.7, "learning_rate": 1.7900866058815424e-05, "loss": 0.5379, "step": 1086 }, { "epoch": 0.7, "learning_rate": 1.789658842377831e-05, "loss": 0.5047, "step": 1087 }, { "epoch": 0.7, "learning_rate": 1.7892306946872952e-05, "loss": 0.471, "step": 1088 }, { "epoch": 0.7, "learning_rate": 1.7888021630182385e-05, "loss": 0.5089, "step": 1089 }, { "epoch": 0.7, "learning_rate": 1.7883732475791512e-05, "loss": 0.4987, "step": 1090 }, { "epoch": 0.7, "learning_rate": 1.78794394857871e-05, "loss": 0.4782, "step": 1091 }, { "epoch": 0.71, "learning_rate": 1.7875142662257788e-05, "loss": 0.5214, "step": 1092 }, { "epoch": 0.71, "learning_rate": 1.7870842007294075e-05, "loss": 0.5142, "step": 1093 }, { "epoch": 0.71, "learning_rate": 1.786653752298833e-05, "loss": 0.5426, "step": 1094 }, { "epoch": 0.71, "learning_rate": 1.7862229211434778e-05, "loss": 0.5114, "step": 1095 }, { "epoch": 0.71, "learning_rate": 1.7857917074729513e-05, "loss": 0.5259, "step": 1096 }, { "epoch": 0.71, "learning_rate": 1.7853601114970483e-05, "loss": 0.4828, "step": 1097 }, { "epoch": 0.71, "learning_rate": 1.7849281334257504e-05, "loss": 0.5044, "step": 1098 }, { "epoch": 0.71, "learning_rate": 1.7844957734692247e-05, "loss": 0.5169, "step": 1099 }, { "epoch": 0.71, "learning_rate": 1.7840630318378233e-05, "loss": 0.5216, "step": 1100 }, { "epoch": 0.71, "learning_rate": 1.7836299087420856e-05, "loss": 0.5524, "step": 1101 }, { "epoch": 0.71, "learning_rate": 1.7831964043927355e-05, "loss": 0.5026, "step": 1102 }, { "epoch": 0.71, "learning_rate": 1.7827625190006824e-05, "loss": 0.5217, "step": 1103 }, { "epoch": 0.71, "learning_rate": 1.7823282527770214e-05, "loss": 0.5167, "step": 1104 }, { "epoch": 0.71, "learning_rate": 1.7818936059330333e-05, "loss": 0.4926, "step": 1105 }, { "epoch": 0.71, "learning_rate": 1.7814585786801826e-05, "loss": 0.5173, "step": 1106 }, { "epoch": 0.71, "learning_rate": 1.781023171230121e-05, "loss": 0.5072, "step": 1107 }, { "epoch": 0.72, "learning_rate": 1.7805873837946833e-05, "loss": 0.5005, "step": 1108 }, { "epoch": 0.72, "learning_rate": 1.78015121658589e-05, "loss": 0.5084, "step": 1109 }, { "epoch": 0.72, "learning_rate": 1.779714669815946e-05, "loss": 0.4994, "step": 1110 }, { "epoch": 0.72, "learning_rate": 1.7792777436972423e-05, "loss": 0.5261, "step": 1111 }, { "epoch": 0.72, "learning_rate": 1.778840438442352e-05, "loss": 0.5068, "step": 1112 }, { "epoch": 0.72, "learning_rate": 1.7784027542640346e-05, "loss": 0.5095, "step": 1113 }, { "epoch": 0.72, "learning_rate": 1.7779646913752334e-05, "loss": 0.5227, "step": 1114 }, { "epoch": 0.72, "learning_rate": 1.7775262499890753e-05, "loss": 0.5273, "step": 1115 }, { "epoch": 0.72, "learning_rate": 1.7770874303188727e-05, "loss": 0.5188, "step": 1116 }, { "epoch": 0.72, "learning_rate": 1.7766482325781205e-05, "loss": 0.504, "step": 1117 }, { "epoch": 0.72, "learning_rate": 1.776208656980499e-05, "loss": 0.5027, "step": 1118 }, { "epoch": 0.72, "learning_rate": 1.775768703739871e-05, "loss": 0.5304, "step": 1119 }, { "epoch": 0.72, "learning_rate": 1.7753283730702837e-05, "loss": 0.5231, "step": 1120 }, { "epoch": 0.72, "learning_rate": 1.7748876651859686e-05, "loss": 0.5142, "step": 1121 }, { "epoch": 0.72, "learning_rate": 1.7744465803013394e-05, "loss": 0.5475, "step": 1122 }, { "epoch": 0.73, "learning_rate": 1.7740051186309937e-05, "loss": 0.538, "step": 1123 }, { "epoch": 0.73, "learning_rate": 1.7735632803897135e-05, "loss": 0.5015, "step": 1124 }, { "epoch": 0.73, "learning_rate": 1.773121065792462e-05, "loss": 0.5445, "step": 1125 }, { "epoch": 0.73, "learning_rate": 1.7726784750543867e-05, "loss": 0.5315, "step": 1126 }, { "epoch": 0.73, "learning_rate": 1.7722355083908187e-05, "loss": 0.5043, "step": 1127 }, { "epoch": 0.73, "learning_rate": 1.7717921660172708e-05, "loss": 0.5245, "step": 1128 }, { "epoch": 0.73, "learning_rate": 1.771348448149439e-05, "loss": 0.5187, "step": 1129 }, { "epoch": 0.73, "learning_rate": 1.770904355003202e-05, "loss": 0.54, "step": 1130 }, { "epoch": 0.73, "learning_rate": 1.7704598867946212e-05, "loss": 0.516, "step": 1131 }, { "epoch": 0.73, "learning_rate": 1.7700150437399405e-05, "loss": 0.5358, "step": 1132 }, { "epoch": 0.73, "learning_rate": 1.769569826055586e-05, "loss": 0.5294, "step": 1133 }, { "epoch": 0.73, "learning_rate": 1.7691242339581664e-05, "loss": 0.4928, "step": 1134 }, { "epoch": 0.73, "learning_rate": 1.768678267664472e-05, "loss": 0.49, "step": 1135 }, { "epoch": 0.73, "learning_rate": 1.7682319273914755e-05, "loss": 0.4948, "step": 1136 }, { "epoch": 0.73, "learning_rate": 1.7677852133563314e-05, "loss": 0.5264, "step": 1137 }, { "epoch": 0.73, "learning_rate": 1.767338125776376e-05, "loss": 0.5141, "step": 1138 }, { "epoch": 0.74, "learning_rate": 1.766890664869128e-05, "loss": 0.5175, "step": 1139 }, { "epoch": 0.74, "learning_rate": 1.766442830852287e-05, "loss": 0.5031, "step": 1140 }, { "epoch": 0.74, "learning_rate": 1.765994623943734e-05, "loss": 0.516, "step": 1141 }, { "epoch": 0.74, "learning_rate": 1.7655460443615327e-05, "loss": 0.4813, "step": 1142 }, { "epoch": 0.74, "learning_rate": 1.765097092323926e-05, "loss": 0.5086, "step": 1143 }, { "epoch": 0.74, "learning_rate": 1.76464776804934e-05, "loss": 0.5578, "step": 1144 }, { "epoch": 0.74, "learning_rate": 1.7641980717563806e-05, "loss": 0.5103, "step": 1145 }, { "epoch": 0.74, "learning_rate": 1.7637480036638356e-05, "loss": 0.5266, "step": 1146 }, { "epoch": 0.74, "learning_rate": 1.7632975639906727e-05, "loss": 0.5198, "step": 1147 }, { "epoch": 0.74, "learning_rate": 1.7628467529560417e-05, "loss": 0.5132, "step": 1148 }, { "epoch": 0.74, "learning_rate": 1.7623955707792717e-05, "loss": 0.5124, "step": 1149 }, { "epoch": 0.74, "learning_rate": 1.7619440176798733e-05, "loss": 0.5161, "step": 1150 }, { "epoch": 0.74, "learning_rate": 1.761492093877537e-05, "loss": 0.511, "step": 1151 }, { "epoch": 0.74, "learning_rate": 1.7610397995921348e-05, "loss": 0.4985, "step": 1152 }, { "epoch": 0.74, "learning_rate": 1.7605871350437165e-05, "loss": 0.527, "step": 1153 }, { "epoch": 0.75, "learning_rate": 1.760134100452515e-05, "loss": 0.5057, "step": 1154 }, { "epoch": 0.75, "learning_rate": 1.7596806960389415e-05, "loss": 0.5129, "step": 1155 }, { "epoch": 0.75, "learning_rate": 1.759226922023587e-05, "loss": 0.5186, "step": 1156 }, { "epoch": 0.75, "learning_rate": 1.758772778627223e-05, "loss": 0.543, "step": 1157 }, { "epoch": 0.75, "learning_rate": 1.758318266070801e-05, "loss": 0.5008, "step": 1158 }, { "epoch": 0.75, "learning_rate": 1.757863384575451e-05, "loss": 0.4937, "step": 1159 }, { "epoch": 0.75, "learning_rate": 1.7574081343624827e-05, "loss": 0.5187, "step": 1160 }, { "epoch": 0.75, "learning_rate": 1.7569525156533864e-05, "loss": 0.5193, "step": 1161 }, { "epoch": 0.75, "learning_rate": 1.756496528669831e-05, "loss": 0.4996, "step": 1162 }, { "epoch": 0.75, "learning_rate": 1.7560401736336642e-05, "loss": 0.503, "step": 1163 }, { "epoch": 0.75, "learning_rate": 1.7555834507669124e-05, "loss": 0.5226, "step": 1164 }, { "epoch": 0.75, "learning_rate": 1.7551263602917823e-05, "loss": 0.4975, "step": 1165 }, { "epoch": 0.75, "learning_rate": 1.7546689024306585e-05, "loss": 0.5039, "step": 1166 }, { "epoch": 0.75, "learning_rate": 1.7542110774061045e-05, "loss": 0.5548, "step": 1167 }, { "epoch": 0.75, "learning_rate": 1.7537528854408625e-05, "loss": 0.5324, "step": 1168 }, { "epoch": 0.75, "learning_rate": 1.7532943267578532e-05, "loss": 0.5187, "step": 1169 }, { "epoch": 0.76, "learning_rate": 1.752835401580176e-05, "loss": 0.5137, "step": 1170 }, { "epoch": 0.76, "learning_rate": 1.752376110131108e-05, "loss": 0.5121, "step": 1171 }, { "epoch": 0.76, "learning_rate": 1.751916452634105e-05, "loss": 0.5006, "step": 1172 }, { "epoch": 0.76, "learning_rate": 1.7514564293128008e-05, "loss": 0.5244, "step": 1173 }, { "epoch": 0.76, "learning_rate": 1.750996040391007e-05, "loss": 0.5006, "step": 1174 }, { "epoch": 0.76, "learning_rate": 1.750535286092713e-05, "loss": 0.5276, "step": 1175 }, { "epoch": 0.76, "learning_rate": 1.7500741666420863e-05, "loss": 0.4907, "step": 1176 }, { "epoch": 0.76, "learning_rate": 1.7496126822634723e-05, "loss": 0.5101, "step": 1177 }, { "epoch": 0.76, "learning_rate": 1.7491508331813928e-05, "loss": 0.5108, "step": 1178 }, { "epoch": 0.76, "learning_rate": 1.7486886196205487e-05, "loss": 0.5123, "step": 1179 }, { "epoch": 0.76, "learning_rate": 1.7482260418058167e-05, "loss": 0.5297, "step": 1180 }, { "epoch": 0.76, "learning_rate": 1.747763099962251e-05, "loss": 0.5126, "step": 1181 }, { "epoch": 0.76, "learning_rate": 1.747299794315084e-05, "loss": 0.5374, "step": 1182 }, { "epoch": 0.76, "learning_rate": 1.746836125089724e-05, "loss": 0.4925, "step": 1183 }, { "epoch": 0.76, "learning_rate": 1.7463720925117565e-05, "loss": 0.5104, "step": 1184 }, { "epoch": 0.77, "learning_rate": 1.745907696806944e-05, "loss": 0.5239, "step": 1185 }, { "epoch": 0.77, "learning_rate": 1.7454429382012255e-05, "loss": 0.5216, "step": 1186 }, { "epoch": 0.77, "learning_rate": 1.744977816920716e-05, "loss": 0.5455, "step": 1187 }, { "epoch": 0.77, "learning_rate": 1.744512333191708e-05, "loss": 0.517, "step": 1188 }, { "epoch": 0.77, "learning_rate": 1.74404648724067e-05, "loss": 0.5106, "step": 1189 }, { "epoch": 0.77, "learning_rate": 1.743580279294246e-05, "loss": 0.4949, "step": 1190 }, { "epoch": 0.77, "learning_rate": 1.7431137095792566e-05, "loss": 0.4996, "step": 1191 }, { "epoch": 0.77, "learning_rate": 1.7426467783226992e-05, "loss": 0.5081, "step": 1192 }, { "epoch": 0.77, "learning_rate": 1.7421794857517457e-05, "loss": 0.5195, "step": 1193 }, { "epoch": 0.77, "learning_rate": 1.7417118320937452e-05, "loss": 0.5183, "step": 1194 }, { "epoch": 0.77, "learning_rate": 1.7412438175762206e-05, "loss": 0.5022, "step": 1195 }, { "epoch": 0.77, "learning_rate": 1.7407754424268727e-05, "loss": 0.5088, "step": 1196 }, { "epoch": 0.77, "learning_rate": 1.740306706873576e-05, "loss": 0.5048, "step": 1197 }, { "epoch": 0.77, "learning_rate": 1.739837611144381e-05, "loss": 0.5093, "step": 1198 }, { "epoch": 0.77, "learning_rate": 1.7393681554675135e-05, "loss": 0.5248, "step": 1199 }, { "epoch": 0.77, "learning_rate": 1.7388983400713736e-05, "loss": 0.4883, "step": 1200 }, { "epoch": 0.78, "learning_rate": 1.738428165184538e-05, "loss": 0.5036, "step": 1201 }, { "epoch": 0.78, "learning_rate": 1.7379576310357568e-05, "loss": 0.5281, "step": 1202 }, { "epoch": 0.78, "learning_rate": 1.7374867378539558e-05, "loss": 0.5454, "step": 1203 }, { "epoch": 0.78, "learning_rate": 1.7370154858682347e-05, "loss": 0.5094, "step": 1204 }, { "epoch": 0.78, "learning_rate": 1.7365438753078686e-05, "loss": 0.5013, "step": 1205 }, { "epoch": 0.78, "learning_rate": 1.7360719064023067e-05, "loss": 0.5153, "step": 1206 }, { "epoch": 0.78, "learning_rate": 1.735599579381172e-05, "loss": 0.5335, "step": 1207 }, { "epoch": 0.78, "learning_rate": 1.7351268944742626e-05, "loss": 0.509, "step": 1208 }, { "epoch": 0.78, "learning_rate": 1.7346538519115507e-05, "loss": 0.4957, "step": 1209 }, { "epoch": 0.78, "learning_rate": 1.7341804519231815e-05, "loss": 0.5072, "step": 1210 }, { "epoch": 0.78, "learning_rate": 1.7337066947394755e-05, "loss": 0.5127, "step": 1211 }, { "epoch": 0.78, "learning_rate": 1.7332325805909256e-05, "loss": 0.4991, "step": 1212 }, { "epoch": 0.78, "learning_rate": 1.732758109708199e-05, "loss": 0.5347, "step": 1213 }, { "epoch": 0.78, "learning_rate": 1.732283282322137e-05, "loss": 0.5175, "step": 1214 }, { "epoch": 0.78, "learning_rate": 1.7318080986637538e-05, "loss": 0.4978, "step": 1215 }, { "epoch": 0.79, "learning_rate": 1.7313325589642363e-05, "loss": 0.4965, "step": 1216 }, { "epoch": 0.79, "learning_rate": 1.730856663454946e-05, "loss": 0.5195, "step": 1217 }, { "epoch": 0.79, "learning_rate": 1.7303804123674165e-05, "loss": 0.5201, "step": 1218 }, { "epoch": 0.79, "learning_rate": 1.7299038059333545e-05, "loss": 0.5059, "step": 1219 }, { "epoch": 0.79, "learning_rate": 1.7294268443846403e-05, "loss": 0.5275, "step": 1220 }, { "epoch": 0.79, "learning_rate": 1.7289495279533257e-05, "loss": 0.4882, "step": 1221 }, { "epoch": 0.79, "learning_rate": 1.7284718568716362e-05, "loss": 0.5261, "step": 1222 }, { "epoch": 0.79, "learning_rate": 1.7279938313719703e-05, "loss": 0.518, "step": 1223 }, { "epoch": 0.79, "learning_rate": 1.727515451686897e-05, "loss": 0.5007, "step": 1224 }, { "epoch": 0.79, "learning_rate": 1.7270367180491594e-05, "loss": 0.5089, "step": 1225 }, { "epoch": 0.79, "learning_rate": 1.726557630691672e-05, "loss": 0.5096, "step": 1226 }, { "epoch": 0.79, "learning_rate": 1.726078189847522e-05, "loss": 0.4808, "step": 1227 }, { "epoch": 0.79, "learning_rate": 1.7255983957499676e-05, "loss": 0.5111, "step": 1228 }, { "epoch": 0.79, "learning_rate": 1.7251182486324403e-05, "loss": 0.5067, "step": 1229 }, { "epoch": 0.79, "learning_rate": 1.7246377487285415e-05, "loss": 0.5149, "step": 1230 }, { "epoch": 0.79, "learning_rate": 1.724156896272046e-05, "loss": 0.5086, "step": 1231 }, { "epoch": 0.8, "learning_rate": 1.7236756914968985e-05, "loss": 0.518, "step": 1232 }, { "epoch": 0.8, "learning_rate": 1.723194134637217e-05, "loss": 0.4971, "step": 1233 }, { "epoch": 0.8, "learning_rate": 1.722712225927289e-05, "loss": 0.5038, "step": 1234 }, { "epoch": 0.8, "learning_rate": 1.7222299656015742e-05, "loss": 0.5244, "step": 1235 }, { "epoch": 0.8, "learning_rate": 1.7217473538947032e-05, "loss": 0.5114, "step": 1236 }, { "epoch": 0.8, "learning_rate": 1.7212643910414776e-05, "loss": 0.5074, "step": 1237 }, { "epoch": 0.8, "learning_rate": 1.7207810772768692e-05, "loss": 0.4893, "step": 1238 }, { "epoch": 0.8, "learning_rate": 1.7202974128360213e-05, "loss": 0.5326, "step": 1239 }, { "epoch": 0.8, "learning_rate": 1.719813397954248e-05, "loss": 0.5206, "step": 1240 }, { "epoch": 0.8, "learning_rate": 1.7193290328670328e-05, "loss": 0.5236, "step": 1241 }, { "epoch": 0.8, "learning_rate": 1.7188443178100306e-05, "loss": 0.5103, "step": 1242 }, { "epoch": 0.8, "learning_rate": 1.7183592530190662e-05, "loss": 0.5098, "step": 1243 }, { "epoch": 0.8, "learning_rate": 1.7178738387301342e-05, "loss": 0.5331, "step": 1244 }, { "epoch": 0.8, "learning_rate": 1.7173880751794e-05, "loss": 0.5452, "step": 1245 }, { "epoch": 0.8, "learning_rate": 1.7169019626031985e-05, "loss": 0.5106, "step": 1246 }, { "epoch": 0.81, "learning_rate": 1.716415501238034e-05, "loss": 0.5172, "step": 1247 }, { "epoch": 0.81, "learning_rate": 1.7159286913205813e-05, "loss": 0.4953, "step": 1248 }, { "epoch": 0.81, "learning_rate": 1.715441533087684e-05, "loss": 0.5438, "step": 1249 }, { "epoch": 0.81, "learning_rate": 1.7149540267763566e-05, "loss": 0.5049, "step": 1250 }, { "epoch": 0.81, "learning_rate": 1.7144661726237804e-05, "loss": 0.5036, "step": 1251 }, { "epoch": 0.81, "learning_rate": 1.7139779708673084e-05, "loss": 0.5386, "step": 1252 }, { "epoch": 0.81, "learning_rate": 1.7134894217444617e-05, "loss": 0.5092, "step": 1253 }, { "epoch": 0.81, "learning_rate": 1.71300052549293e-05, "loss": 0.5154, "step": 1254 }, { "epoch": 0.81, "learning_rate": 1.7125112823505736e-05, "loss": 0.5381, "step": 1255 }, { "epoch": 0.81, "learning_rate": 1.7120216925554185e-05, "loss": 0.5169, "step": 1256 }, { "epoch": 0.81, "learning_rate": 1.711531756345662e-05, "loss": 0.5025, "step": 1257 }, { "epoch": 0.81, "learning_rate": 1.7110414739596697e-05, "loss": 0.5171, "step": 1258 }, { "epoch": 0.81, "learning_rate": 1.7105508456359742e-05, "loss": 0.5183, "step": 1259 }, { "epoch": 0.81, "learning_rate": 1.7100598716132775e-05, "loss": 0.5098, "step": 1260 }, { "epoch": 0.81, "learning_rate": 1.7095685521304498e-05, "loss": 0.5048, "step": 1261 }, { "epoch": 0.81, "learning_rate": 1.7090768874265285e-05, "loss": 0.5032, "step": 1262 }, { "epoch": 0.82, "learning_rate": 1.70858487774072e-05, "loss": 0.5145, "step": 1263 }, { "epoch": 0.82, "learning_rate": 1.708092523312398e-05, "loss": 0.5152, "step": 1264 }, { "epoch": 0.82, "learning_rate": 1.7075998243811043e-05, "loss": 0.5217, "step": 1265 }, { "epoch": 0.82, "learning_rate": 1.7071067811865477e-05, "loss": 0.5208, "step": 1266 }, { "epoch": 0.82, "learning_rate": 1.706613393968605e-05, "loss": 0.5141, "step": 1267 }, { "epoch": 0.82, "learning_rate": 1.7061196629673198e-05, "loss": 0.5043, "step": 1268 }, { "epoch": 0.82, "learning_rate": 1.705625588422904e-05, "loss": 0.5179, "step": 1269 }, { "epoch": 0.82, "learning_rate": 1.7051311705757353e-05, "loss": 0.5049, "step": 1270 }, { "epoch": 0.82, "learning_rate": 1.7046364096663598e-05, "loss": 0.5058, "step": 1271 }, { "epoch": 0.82, "learning_rate": 1.7041413059354893e-05, "loss": 0.4851, "step": 1272 }, { "epoch": 0.82, "learning_rate": 1.703645859624003e-05, "loss": 0.5148, "step": 1273 }, { "epoch": 0.82, "learning_rate": 1.703150070972947e-05, "loss": 0.5357, "step": 1274 }, { "epoch": 0.82, "learning_rate": 1.702653940223534e-05, "loss": 0.518, "step": 1275 }, { "epoch": 0.82, "learning_rate": 1.7021574676171418e-05, "loss": 0.4938, "step": 1276 }, { "epoch": 0.82, "learning_rate": 1.701660653395316e-05, "loss": 0.5286, "step": 1277 }, { "epoch": 0.83, "learning_rate": 1.7011634977997683e-05, "loss": 0.5147, "step": 1278 }, { "epoch": 0.83, "learning_rate": 1.7006660010723754e-05, "loss": 0.5299, "step": 1279 }, { "epoch": 0.83, "learning_rate": 1.7001681634551813e-05, "loss": 0.5135, "step": 1280 }, { "epoch": 0.83, "learning_rate": 1.6996699851903942e-05, "loss": 0.5311, "step": 1281 }, { "epoch": 0.83, "learning_rate": 1.6991714665203905e-05, "loss": 0.5238, "step": 1282 }, { "epoch": 0.83, "learning_rate": 1.69867260768771e-05, "loss": 0.4854, "step": 1283 }, { "epoch": 0.83, "learning_rate": 1.6981734089350585e-05, "loss": 0.4983, "step": 1284 }, { "epoch": 0.83, "learning_rate": 1.6976738705053078e-05, "loss": 0.5203, "step": 1285 }, { "epoch": 0.83, "learning_rate": 1.6971739926414946e-05, "loss": 0.4819, "step": 1286 }, { "epoch": 0.83, "learning_rate": 1.6966737755868207e-05, "loss": 0.5458, "step": 1287 }, { "epoch": 0.83, "learning_rate": 1.696173219584653e-05, "loss": 0.5173, "step": 1288 }, { "epoch": 0.83, "learning_rate": 1.6956723248785235e-05, "loss": 0.4975, "step": 1289 }, { "epoch": 0.83, "learning_rate": 1.695171091712128e-05, "loss": 0.4983, "step": 1290 }, { "epoch": 0.83, "learning_rate": 1.6946695203293286e-05, "loss": 0.5362, "step": 1291 }, { "epoch": 0.83, "learning_rate": 1.6941676109741506e-05, "loss": 0.504, "step": 1292 }, { "epoch": 0.83, "learning_rate": 1.6936653638907846e-05, "loss": 0.5189, "step": 1293 }, { "epoch": 0.84, "learning_rate": 1.6931627793235845e-05, "loss": 0.5047, "step": 1294 }, { "epoch": 0.84, "learning_rate": 1.6926598575170695e-05, "loss": 0.5084, "step": 1295 }, { "epoch": 0.84, "learning_rate": 1.6921565987159226e-05, "loss": 0.5248, "step": 1296 }, { "epoch": 0.84, "learning_rate": 1.69165300316499e-05, "loss": 0.5173, "step": 1297 }, { "epoch": 0.84, "learning_rate": 1.6911490711092824e-05, "loss": 0.5361, "step": 1298 }, { "epoch": 0.84, "learning_rate": 1.6906448027939744e-05, "loss": 0.4966, "step": 1299 }, { "epoch": 0.84, "learning_rate": 1.6901401984644034e-05, "loss": 0.5204, "step": 1300 }, { "epoch": 0.84, "learning_rate": 1.6896352583660712e-05, "loss": 0.5283, "step": 1301 }, { "epoch": 0.84, "learning_rate": 1.6891299827446428e-05, "loss": 0.5093, "step": 1302 }, { "epoch": 0.84, "learning_rate": 1.6886243718459457e-05, "loss": 0.5409, "step": 1303 }, { "epoch": 0.84, "learning_rate": 1.6881184259159708e-05, "loss": 0.5153, "step": 1304 }, { "epoch": 0.84, "learning_rate": 1.687612145200873e-05, "loss": 0.5153, "step": 1305 }, { "epoch": 0.84, "learning_rate": 1.6871055299469686e-05, "loss": 0.5058, "step": 1306 }, { "epoch": 0.84, "learning_rate": 1.6865985804007373e-05, "loss": 0.5116, "step": 1307 }, { "epoch": 0.84, "learning_rate": 1.686091296808822e-05, "loss": 0.5132, "step": 1308 }, { "epoch": 0.85, "learning_rate": 1.685583679418027e-05, "loss": 0.5255, "step": 1309 }, { "epoch": 0.85, "learning_rate": 1.6850757284753202e-05, "loss": 0.5406, "step": 1310 }, { "epoch": 0.85, "learning_rate": 1.6845674442278307e-05, "loss": 0.4971, "step": 1311 }, { "epoch": 0.85, "learning_rate": 1.6840588269228507e-05, "loss": 0.5071, "step": 1312 }, { "epoch": 0.85, "learning_rate": 1.6835498768078333e-05, "loss": 0.5115, "step": 1313 }, { "epoch": 0.85, "learning_rate": 1.6830405941303948e-05, "loss": 0.5121, "step": 1314 }, { "epoch": 0.85, "learning_rate": 1.6825309791383128e-05, "loss": 0.5044, "step": 1315 }, { "epoch": 0.85, "learning_rate": 1.682021032079526e-05, "loss": 0.5117, "step": 1316 }, { "epoch": 0.85, "learning_rate": 1.6815107532021354e-05, "loss": 0.4931, "step": 1317 }, { "epoch": 0.85, "learning_rate": 1.681000142754403e-05, "loss": 0.4823, "step": 1318 }, { "epoch": 0.85, "learning_rate": 1.6804892009847528e-05, "loss": 0.5225, "step": 1319 }, { "epoch": 0.85, "learning_rate": 1.6799779281417685e-05, "loss": 0.5079, "step": 1320 }, { "epoch": 0.85, "learning_rate": 1.679466324474197e-05, "loss": 0.4893, "step": 1321 }, { "epoch": 0.85, "learning_rate": 1.6789543902309443e-05, "loss": 0.5187, "step": 1322 }, { "epoch": 0.85, "learning_rate": 1.6784421256610778e-05, "loss": 0.4944, "step": 1323 }, { "epoch": 0.85, "learning_rate": 1.6779295310138264e-05, "loss": 0.5076, "step": 1324 }, { "epoch": 0.86, "learning_rate": 1.6774166065385787e-05, "loss": 0.5204, "step": 1325 }, { "epoch": 0.86, "learning_rate": 1.6769033524848833e-05, "loss": 0.52, "step": 1326 }, { "epoch": 0.86, "learning_rate": 1.6763897691024514e-05, "loss": 0.5268, "step": 1327 }, { "epoch": 0.86, "learning_rate": 1.6758758566411516e-05, "loss": 0.5077, "step": 1328 }, { "epoch": 0.86, "learning_rate": 1.6753616153510145e-05, "loss": 0.5178, "step": 1329 }, { "epoch": 0.86, "learning_rate": 1.6748470454822295e-05, "loss": 0.5056, "step": 1330 }, { "epoch": 0.86, "learning_rate": 1.674332147285147e-05, "loss": 0.493, "step": 1331 }, { "epoch": 0.86, "learning_rate": 1.6738169210102765e-05, "loss": 0.5219, "step": 1332 }, { "epoch": 0.86, "learning_rate": 1.6733013669082868e-05, "loss": 0.5168, "step": 1333 }, { "epoch": 0.86, "learning_rate": 1.6727854852300073e-05, "loss": 0.5074, "step": 1334 }, { "epoch": 0.86, "learning_rate": 1.672269276226425e-05, "loss": 0.5129, "step": 1335 }, { "epoch": 0.86, "learning_rate": 1.6717527401486882e-05, "loss": 0.481, "step": 1336 }, { "epoch": 0.86, "learning_rate": 1.6712358772481032e-05, "loss": 0.5219, "step": 1337 }, { "epoch": 0.86, "learning_rate": 1.670718687776135e-05, "loss": 0.5052, "step": 1338 }, { "epoch": 0.86, "learning_rate": 1.6702011719844083e-05, "loss": 0.5042, "step": 1339 }, { "epoch": 0.87, "learning_rate": 1.669683330124706e-05, "loss": 0.4991, "step": 1340 }, { "epoch": 0.87, "learning_rate": 1.66916516244897e-05, "loss": 0.5027, "step": 1341 }, { "epoch": 0.87, "learning_rate": 1.6686466692093007e-05, "loss": 0.5023, "step": 1342 }, { "epoch": 0.87, "learning_rate": 1.6681278506579562e-05, "loss": 0.5302, "step": 1343 }, { "epoch": 0.87, "learning_rate": 1.667608707047354e-05, "loss": 0.5045, "step": 1344 }, { "epoch": 0.87, "learning_rate": 1.6670892386300687e-05, "loss": 0.543, "step": 1345 }, { "epoch": 0.87, "learning_rate": 1.6665694456588335e-05, "loss": 0.5269, "step": 1346 }, { "epoch": 0.87, "learning_rate": 1.6660493283865394e-05, "loss": 0.5089, "step": 1347 }, { "epoch": 0.87, "learning_rate": 1.6655288870662354e-05, "loss": 0.508, "step": 1348 }, { "epoch": 0.87, "learning_rate": 1.665008121951128e-05, "loss": 0.5125, "step": 1349 }, { "epoch": 0.87, "learning_rate": 1.6644870332945807e-05, "loss": 0.4898, "step": 1350 }, { "epoch": 0.87, "learning_rate": 1.6639656213501155e-05, "loss": 0.5182, "step": 1351 }, { "epoch": 0.87, "learning_rate": 1.6634438863714108e-05, "loss": 0.523, "step": 1352 }, { "epoch": 0.87, "learning_rate": 1.662921828612302e-05, "loss": 0.5151, "step": 1353 }, { "epoch": 0.87, "learning_rate": 1.6623994483267823e-05, "loss": 0.5171, "step": 1354 }, { "epoch": 0.87, "learning_rate": 1.6618767457690012e-05, "loss": 0.5299, "step": 1355 }, { "epoch": 0.88, "learning_rate": 1.661353721193266e-05, "loss": 0.5199, "step": 1356 }, { "epoch": 0.88, "learning_rate": 1.6608303748540393e-05, "loss": 0.5336, "step": 1357 }, { "epoch": 0.88, "learning_rate": 1.6603067070059413e-05, "loss": 0.5143, "step": 1358 }, { "epoch": 0.88, "learning_rate": 1.6597827179037475e-05, "loss": 0.5243, "step": 1359 }, { "epoch": 0.88, "learning_rate": 1.6592584078023915e-05, "loss": 0.4958, "step": 1360 }, { "epoch": 0.88, "learning_rate": 1.658733776956961e-05, "loss": 0.5049, "step": 1361 }, { "epoch": 0.88, "learning_rate": 1.658208825622701e-05, "loss": 0.5326, "step": 1362 }, { "epoch": 0.88, "learning_rate": 1.6576835540550128e-05, "loss": 0.5201, "step": 1363 }, { "epoch": 0.88, "learning_rate": 1.657157962509452e-05, "loss": 0.4997, "step": 1364 }, { "epoch": 0.88, "learning_rate": 1.656632051241731e-05, "loss": 0.5272, "step": 1365 }, { "epoch": 0.88, "learning_rate": 1.656105820507718e-05, "loss": 0.5172, "step": 1366 }, { "epoch": 0.88, "learning_rate": 1.6555792705634352e-05, "loss": 0.4955, "step": 1367 }, { "epoch": 0.88, "learning_rate": 1.6550524016650617e-05, "loss": 0.5285, "step": 1368 }, { "epoch": 0.88, "learning_rate": 1.6545252140689307e-05, "loss": 0.5164, "step": 1369 }, { "epoch": 0.88, "learning_rate": 1.6539977080315313e-05, "loss": 0.5184, "step": 1370 }, { "epoch": 0.89, "learning_rate": 1.6534698838095066e-05, "loss": 0.5073, "step": 1371 }, { "epoch": 0.89, "learning_rate": 1.652941741659655e-05, "loss": 0.5208, "step": 1372 }, { "epoch": 0.89, "learning_rate": 1.65241328183893e-05, "loss": 0.4928, "step": 1373 }, { "epoch": 0.89, "learning_rate": 1.651884504604439e-05, "loss": 0.5012, "step": 1374 }, { "epoch": 0.89, "learning_rate": 1.6513554102134438e-05, "loss": 0.5292, "step": 1375 }, { "epoch": 0.89, "learning_rate": 1.650825998923361e-05, "loss": 0.4998, "step": 1376 }, { "epoch": 0.89, "learning_rate": 1.6502962709917612e-05, "loss": 0.5095, "step": 1377 }, { "epoch": 0.89, "learning_rate": 1.6497662266763685e-05, "loss": 0.5255, "step": 1378 }, { "epoch": 0.89, "learning_rate": 1.649235866235062e-05, "loss": 0.488, "step": 1379 }, { "epoch": 0.89, "learning_rate": 1.6487051899258738e-05, "loss": 0.5265, "step": 1380 }, { "epoch": 0.89, "learning_rate": 1.6481741980069898e-05, "loss": 0.5199, "step": 1381 }, { "epoch": 0.89, "learning_rate": 1.6476428907367497e-05, "loss": 0.5006, "step": 1382 }, { "epoch": 0.89, "learning_rate": 1.6471112683736462e-05, "loss": 0.527, "step": 1383 }, { "epoch": 0.89, "learning_rate": 1.6465793311763255e-05, "loss": 0.4793, "step": 1384 }, { "epoch": 0.89, "learning_rate": 1.6460470794035875e-05, "loss": 0.4945, "step": 1385 }, { "epoch": 0.89, "learning_rate": 1.6455145133143843e-05, "loss": 0.4958, "step": 1386 }, { "epoch": 0.9, "learning_rate": 1.6449816331678214e-05, "loss": 0.5085, "step": 1387 }, { "epoch": 0.9, "learning_rate": 1.6444484392231574e-05, "loss": 0.535, "step": 1388 }, { "epoch": 0.9, "learning_rate": 1.6439149317398025e-05, "loss": 0.5011, "step": 1389 }, { "epoch": 0.9, "learning_rate": 1.6433811109773202e-05, "loss": 0.5181, "step": 1390 }, { "epoch": 0.9, "learning_rate": 1.6428469771954267e-05, "loss": 0.5112, "step": 1391 }, { "epoch": 0.9, "learning_rate": 1.6423125306539903e-05, "loss": 0.4876, "step": 1392 }, { "epoch": 0.9, "learning_rate": 1.6417777716130305e-05, "loss": 0.4965, "step": 1393 }, { "epoch": 0.9, "learning_rate": 1.64124270033272e-05, "loss": 0.5294, "step": 1394 }, { "epoch": 0.9, "learning_rate": 1.6407073170733836e-05, "loss": 0.5222, "step": 1395 }, { "epoch": 0.9, "learning_rate": 1.6401716220954968e-05, "loss": 0.5012, "step": 1396 }, { "epoch": 0.9, "learning_rate": 1.639635615659687e-05, "loss": 0.5064, "step": 1397 }, { "epoch": 0.9, "learning_rate": 1.639099298026734e-05, "loss": 0.5192, "step": 1398 }, { "epoch": 0.9, "learning_rate": 1.638562669457568e-05, "loss": 0.4917, "step": 1399 }, { "epoch": 0.9, "learning_rate": 1.638025730213271e-05, "loss": 0.5064, "step": 1400 }, { "epoch": 0.9, "learning_rate": 1.637488480555076e-05, "loss": 0.5274, "step": 1401 }, { "epoch": 0.91, "learning_rate": 1.6369509207443676e-05, "loss": 0.5113, "step": 1402 }, { "epoch": 0.91, "learning_rate": 1.6364130510426798e-05, "loss": 0.5173, "step": 1403 }, { "epoch": 0.91, "learning_rate": 1.6358748717116993e-05, "loss": 0.5251, "step": 1404 }, { "epoch": 0.91, "learning_rate": 1.6353363830132618e-05, "loss": 0.5186, "step": 1405 }, { "epoch": 0.91, "learning_rate": 1.6347975852093544e-05, "loss": 0.5136, "step": 1406 }, { "epoch": 0.91, "learning_rate": 1.6342584785621146e-05, "loss": 0.5028, "step": 1407 }, { "epoch": 0.91, "learning_rate": 1.6337190633338294e-05, "loss": 0.5086, "step": 1408 }, { "epoch": 0.91, "learning_rate": 1.6331793397869372e-05, "loss": 0.5086, "step": 1409 }, { "epoch": 0.91, "learning_rate": 1.632639308184025e-05, "loss": 0.507, "step": 1410 }, { "epoch": 0.91, "learning_rate": 1.6320989687878304e-05, "loss": 0.4973, "step": 1411 }, { "epoch": 0.91, "learning_rate": 1.631558321861241e-05, "loss": 0.5337, "step": 1412 }, { "epoch": 0.91, "learning_rate": 1.631017367667294e-05, "loss": 0.5183, "step": 1413 }, { "epoch": 0.91, "learning_rate": 1.6304761064691752e-05, "loss": 0.5192, "step": 1414 }, { "epoch": 0.91, "learning_rate": 1.6299345385302204e-05, "loss": 0.4876, "step": 1415 }, { "epoch": 0.91, "learning_rate": 1.6293926641139154e-05, "loss": 0.5069, "step": 1416 }, { "epoch": 0.91, "learning_rate": 1.628850483483893e-05, "loss": 0.5058, "step": 1417 }, { "epoch": 0.92, "learning_rate": 1.6283079969039376e-05, "loss": 0.5173, "step": 1418 }, { "epoch": 0.92, "learning_rate": 1.6277652046379804e-05, "loss": 0.5151, "step": 1419 }, { "epoch": 0.92, "learning_rate": 1.627222106950102e-05, "loss": 0.4989, "step": 1420 }, { "epoch": 0.92, "learning_rate": 1.6266787041045322e-05, "loss": 0.523, "step": 1421 }, { "epoch": 0.92, "learning_rate": 1.6261349963656482e-05, "loss": 0.5282, "step": 1422 }, { "epoch": 0.92, "learning_rate": 1.6255909839979764e-05, "loss": 0.5199, "step": 1423 }, { "epoch": 0.92, "learning_rate": 1.625046667266191e-05, "loss": 0.5036, "step": 1424 }, { "epoch": 0.92, "learning_rate": 1.6245020464351142e-05, "loss": 0.4837, "step": 1425 }, { "epoch": 0.92, "learning_rate": 1.6239571217697164e-05, "loss": 0.5184, "step": 1426 }, { "epoch": 0.92, "learning_rate": 1.6234118935351162e-05, "loss": 0.5194, "step": 1427 }, { "epoch": 0.92, "learning_rate": 1.6228663619965787e-05, "loss": 0.508, "step": 1428 }, { "epoch": 0.92, "learning_rate": 1.6223205274195178e-05, "loss": 0.4745, "step": 1429 }, { "epoch": 0.92, "learning_rate": 1.621774390069494e-05, "loss": 0.5002, "step": 1430 }, { "epoch": 0.92, "learning_rate": 1.6212279502122158e-05, "loss": 0.5043, "step": 1431 }, { "epoch": 0.92, "learning_rate": 1.620681208113538e-05, "loss": 0.5216, "step": 1432 }, { "epoch": 0.93, "learning_rate": 1.620134164039463e-05, "loss": 0.5129, "step": 1433 }, { "epoch": 0.93, "learning_rate": 1.619586818256141e-05, "loss": 0.5296, "step": 1434 }, { "epoch": 0.93, "learning_rate": 1.6190391710298664e-05, "loss": 0.5182, "step": 1435 }, { "epoch": 0.93, "learning_rate": 1.6184912226270833e-05, "loss": 0.5254, "step": 1436 }, { "epoch": 0.93, "learning_rate": 1.6179429733143807e-05, "loss": 0.5026, "step": 1437 }, { "epoch": 0.93, "learning_rate": 1.6173944233584936e-05, "loss": 0.4971, "step": 1438 }, { "epoch": 0.93, "learning_rate": 1.6168455730263048e-05, "loss": 0.5064, "step": 1439 }, { "epoch": 0.93, "learning_rate": 1.6162964225848416e-05, "loss": 0.49, "step": 1440 }, { "epoch": 0.93, "learning_rate": 1.615746972301279e-05, "loss": 0.5023, "step": 1441 }, { "epoch": 0.93, "learning_rate": 1.6151972224429356e-05, "loss": 0.5344, "step": 1442 }, { "epoch": 0.93, "learning_rate": 1.6146471732772783e-05, "loss": 0.5106, "step": 1443 }, { "epoch": 0.93, "learning_rate": 1.6140968250719177e-05, "loss": 0.5374, "step": 1444 }, { "epoch": 0.93, "learning_rate": 1.613546178094611e-05, "loss": 0.513, "step": 1445 }, { "epoch": 0.93, "learning_rate": 1.6129952326132603e-05, "loss": 0.5195, "step": 1446 }, { "epoch": 0.93, "learning_rate": 1.6124439888959128e-05, "loss": 0.5115, "step": 1447 }, { "epoch": 0.93, "learning_rate": 1.611892447210761e-05, "loss": 0.4948, "step": 1448 }, { "epoch": 0.94, "learning_rate": 1.6113406078261427e-05, "loss": 0.5326, "step": 1449 }, { "epoch": 0.94, "learning_rate": 1.61078847101054e-05, "loss": 0.5116, "step": 1450 }, { "epoch": 0.94, "learning_rate": 1.6102360370325797e-05, "loss": 0.4932, "step": 1451 }, { "epoch": 0.94, "learning_rate": 1.609683306161034e-05, "loss": 0.5219, "step": 1452 }, { "epoch": 0.94, "learning_rate": 1.609130278664818e-05, "loss": 0.5207, "step": 1453 }, { "epoch": 0.94, "learning_rate": 1.6085769548129928e-05, "loss": 0.4943, "step": 1454 }, { "epoch": 0.94, "learning_rate": 1.608023334874762e-05, "loss": 0.5191, "step": 1455 }, { "epoch": 0.94, "learning_rate": 1.6074694191194758e-05, "loss": 0.5183, "step": 1456 }, { "epoch": 0.94, "learning_rate": 1.606915207816625e-05, "loss": 0.5353, "step": 1457 }, { "epoch": 0.94, "learning_rate": 1.6063607012358474e-05, "loss": 0.5192, "step": 1458 }, { "epoch": 0.94, "learning_rate": 1.6058058996469213e-05, "loss": 0.4887, "step": 1459 }, { "epoch": 0.94, "learning_rate": 1.6052508033197713e-05, "loss": 0.5243, "step": 1460 }, { "epoch": 0.94, "learning_rate": 1.604695412524464e-05, "loss": 0.4927, "step": 1461 }, { "epoch": 0.94, "learning_rate": 1.6041397275312102e-05, "loss": 0.5017, "step": 1462 }, { "epoch": 0.94, "learning_rate": 1.6035837486103617e-05, "loss": 0.5185, "step": 1463 }, { "epoch": 0.95, "learning_rate": 1.6030274760324163e-05, "loss": 0.5145, "step": 1464 }, { "epoch": 0.95, "learning_rate": 1.602470910068012e-05, "loss": 0.4996, "step": 1465 }, { "epoch": 0.95, "learning_rate": 1.6019140509879312e-05, "loss": 0.5129, "step": 1466 }, { "epoch": 0.95, "learning_rate": 1.6013568990630986e-05, "loss": 0.493, "step": 1467 }, { "epoch": 0.95, "learning_rate": 1.6007994545645807e-05, "loss": 0.494, "step": 1468 }, { "epoch": 0.95, "learning_rate": 1.6002417177635868e-05, "loss": 0.5111, "step": 1469 }, { "epoch": 0.95, "learning_rate": 1.599683688931469e-05, "loss": 0.5222, "step": 1470 }, { "epoch": 0.95, "learning_rate": 1.5991253683397205e-05, "loss": 0.5072, "step": 1471 }, { "epoch": 0.95, "learning_rate": 1.598566756259977e-05, "loss": 0.4955, "step": 1472 }, { "epoch": 0.95, "learning_rate": 1.598007852964015e-05, "loss": 0.5214, "step": 1473 }, { "epoch": 0.95, "learning_rate": 1.5974486587237554e-05, "loss": 0.5053, "step": 1474 }, { "epoch": 0.95, "learning_rate": 1.596889173811257e-05, "loss": 0.501, "step": 1475 }, { "epoch": 0.95, "learning_rate": 1.596329398498723e-05, "loss": 0.5176, "step": 1476 }, { "epoch": 0.95, "learning_rate": 1.5957693330584957e-05, "loss": 0.5073, "step": 1477 }, { "epoch": 0.95, "learning_rate": 1.5952089777630604e-05, "loss": 0.5094, "step": 1478 }, { "epoch": 0.95, "learning_rate": 1.5946483328850426e-05, "loss": 0.5098, "step": 1479 }, { "epoch": 0.96, "learning_rate": 1.5940873986972078e-05, "loss": 0.5364, "step": 1480 }, { "epoch": 0.96, "learning_rate": 1.593526175472464e-05, "loss": 0.4954, "step": 1481 }, { "epoch": 0.96, "learning_rate": 1.5929646634838583e-05, "loss": 0.4943, "step": 1482 }, { "epoch": 0.96, "learning_rate": 1.59240286300458e-05, "loss": 0.5158, "step": 1483 }, { "epoch": 0.96, "learning_rate": 1.5918407743079564e-05, "loss": 0.5005, "step": 1484 }, { "epoch": 0.96, "learning_rate": 1.591278397667457e-05, "loss": 0.5145, "step": 1485 }, { "epoch": 0.96, "learning_rate": 1.59071573335669e-05, "loss": 0.5202, "step": 1486 }, { "epoch": 0.96, "learning_rate": 1.5901527816494056e-05, "loss": 0.4848, "step": 1487 }, { "epoch": 0.96, "learning_rate": 1.5895895428194915e-05, "loss": 0.4877, "step": 1488 }, { "epoch": 0.96, "learning_rate": 1.5890260171409765e-05, "loss": 0.5099, "step": 1489 }, { "epoch": 0.96, "learning_rate": 1.5884622048880283e-05, "loss": 0.5028, "step": 1490 }, { "epoch": 0.96, "learning_rate": 1.5878981063349543e-05, "loss": 0.5397, "step": 1491 }, { "epoch": 0.96, "learning_rate": 1.5873337217562012e-05, "loss": 0.5019, "step": 1492 }, { "epoch": 0.96, "learning_rate": 1.5867690514263548e-05, "loss": 0.5302, "step": 1493 }, { "epoch": 0.96, "learning_rate": 1.58620409562014e-05, "loss": 0.4937, "step": 1494 }, { "epoch": 0.97, "learning_rate": 1.5856388546124208e-05, "loss": 0.5006, "step": 1495 }, { "epoch": 0.97, "learning_rate": 1.585073328678199e-05, "loss": 0.5192, "step": 1496 }, { "epoch": 0.97, "learning_rate": 1.5845075180926163e-05, "loss": 0.4996, "step": 1497 }, { "epoch": 0.97, "learning_rate": 1.583941423130952e-05, "loss": 0.5166, "step": 1498 }, { "epoch": 0.97, "learning_rate": 1.5833750440686242e-05, "loss": 0.487, "step": 1499 }, { "epoch": 0.97, "learning_rate": 1.582808381181189e-05, "loss": 0.4884, "step": 1500 }, { "epoch": 0.97, "learning_rate": 1.582241434744341e-05, "loss": 0.5056, "step": 1501 }, { "epoch": 0.97, "learning_rate": 1.581674205033912e-05, "loss": 0.5273, "step": 1502 }, { "epoch": 0.97, "learning_rate": 1.5811066923258718e-05, "loss": 0.5266, "step": 1503 }, { "epoch": 0.97, "learning_rate": 1.5805388968963286e-05, "loss": 0.5133, "step": 1504 }, { "epoch": 0.97, "learning_rate": 1.579970819021528e-05, "loss": 0.5338, "step": 1505 }, { "epoch": 0.97, "learning_rate": 1.5794024589778518e-05, "loss": 0.5346, "step": 1506 }, { "epoch": 0.97, "learning_rate": 1.5788338170418208e-05, "loss": 0.5348, "step": 1507 }, { "epoch": 0.97, "learning_rate": 1.5782648934900915e-05, "loss": 0.489, "step": 1508 }, { "epoch": 0.97, "learning_rate": 1.5776956885994584e-05, "loss": 0.5334, "step": 1509 }, { "epoch": 0.97, "learning_rate": 1.577126202646852e-05, "loss": 0.499, "step": 1510 }, { "epoch": 0.98, "learning_rate": 1.576556435909341e-05, "loss": 0.4841, "step": 1511 }, { "epoch": 0.98, "learning_rate": 1.575986388664129e-05, "loss": 0.5462, "step": 1512 }, { "epoch": 0.98, "learning_rate": 1.5754160611885566e-05, "loss": 0.479, "step": 1513 }, { "epoch": 0.98, "learning_rate": 1.574845453760102e-05, "loss": 0.5198, "step": 1514 }, { "epoch": 0.98, "learning_rate": 1.574274566656378e-05, "loss": 0.5182, "step": 1515 }, { "epoch": 0.98, "learning_rate": 1.5737034001551336e-05, "loss": 0.5035, "step": 1516 }, { "epoch": 0.98, "learning_rate": 1.573131954534255e-05, "loss": 0.5155, "step": 1517 }, { "epoch": 0.98, "learning_rate": 1.5725602300717628e-05, "loss": 0.5047, "step": 1518 }, { "epoch": 0.98, "learning_rate": 1.5719882270458138e-05, "loss": 0.5021, "step": 1519 }, { "epoch": 0.98, "learning_rate": 1.5714159457347007e-05, "loss": 0.5158, "step": 1520 }, { "epoch": 0.98, "learning_rate": 1.5708433864168512e-05, "loss": 0.5096, "step": 1521 }, { "epoch": 0.98, "learning_rate": 1.5702705493708283e-05, "loss": 0.5298, "step": 1522 }, { "epoch": 0.98, "learning_rate": 1.5696974348753298e-05, "loss": 0.5062, "step": 1523 }, { "epoch": 0.98, "learning_rate": 1.5691240432091892e-05, "loss": 0.4926, "step": 1524 }, { "epoch": 0.98, "learning_rate": 1.5685503746513744e-05, "loss": 0.5314, "step": 1525 }, { "epoch": 0.99, "learning_rate": 1.5679764294809882e-05, "loss": 0.5064, "step": 1526 }, { "epoch": 0.99, "learning_rate": 1.5674022079772675e-05, "loss": 0.5277, "step": 1527 }, { "epoch": 0.99, "learning_rate": 1.566827710419584e-05, "loss": 0.507, "step": 1528 }, { "epoch": 0.99, "learning_rate": 1.5662529370874444e-05, "loss": 0.48, "step": 1529 }, { "epoch": 0.99, "learning_rate": 1.565677888260488e-05, "loss": 0.5373, "step": 1530 }, { "epoch": 0.99, "learning_rate": 1.5651025642184898e-05, "loss": 0.5091, "step": 1531 }, { "epoch": 0.99, "learning_rate": 1.5645269652413574e-05, "loss": 0.5269, "step": 1532 }, { "epoch": 0.99, "learning_rate": 1.5639510916091326e-05, "loss": 0.5, "step": 1533 }, { "epoch": 0.99, "learning_rate": 1.5633749436019913e-05, "loss": 0.5446, "step": 1534 }, { "epoch": 0.99, "learning_rate": 1.5627985215002423e-05, "loss": 0.4965, "step": 1535 }, { "epoch": 0.99, "learning_rate": 1.5622218255843276e-05, "loss": 0.5218, "step": 1536 }, { "epoch": 0.99, "learning_rate": 1.5616448561348233e-05, "loss": 0.5086, "step": 1537 }, { "epoch": 0.99, "learning_rate": 1.561067613432438e-05, "loss": 0.507, "step": 1538 }, { "epoch": 0.99, "learning_rate": 1.5604900977580132e-05, "loss": 0.4896, "step": 1539 }, { "epoch": 0.99, "learning_rate": 1.559912309392523e-05, "loss": 0.5189, "step": 1540 }, { "epoch": 0.99, "learning_rate": 1.559334248617075e-05, "loss": 0.5107, "step": 1541 }, { "epoch": 1.0, "learning_rate": 1.5587559157129078e-05, "loss": 0.5429, "step": 1542 }, { "epoch": 1.0, "learning_rate": 1.5581773109613947e-05, "loss": 0.5176, "step": 1543 }, { "epoch": 1.0, "learning_rate": 1.5575984346440393e-05, "loss": 0.505, "step": 1544 }, { "epoch": 1.0, "learning_rate": 1.5570192870424775e-05, "loss": 0.5305, "step": 1545 }, { "epoch": 1.0, "learning_rate": 1.5564398684384787e-05, "loss": 0.5323, "step": 1546 }, { "epoch": 1.0, "learning_rate": 1.555860179113942e-05, "loss": 0.5274, "step": 1547 }, { "epoch": 1.0, "learning_rate": 1.5552802193509003e-05, "loss": 0.504, "step": 1548 }, { "epoch": 1.0, "learning_rate": 1.5546999894315166e-05, "loss": 0.5059, "step": 1549 }, { "epoch": 1.0, "learning_rate": 1.5541194896380863e-05, "loss": 0.4478, "step": 1550 }, { "epoch": 1.0, "learning_rate": 1.5535387202530346e-05, "loss": 0.4446, "step": 1551 }, { "epoch": 1.0, "learning_rate": 1.55295768155892e-05, "loss": 0.45, "step": 1552 }, { "epoch": 1.0, "learning_rate": 1.5523763738384303e-05, "loss": 0.4117, "step": 1553 }, { "epoch": 1.0, "learning_rate": 1.551794797374385e-05, "loss": 0.4297, "step": 1554 }, { "epoch": 1.0, "learning_rate": 1.551212952449734e-05, "loss": 0.4501, "step": 1555 }, { "epoch": 1.0, "learning_rate": 1.5506308393475582e-05, "loss": 0.41, "step": 1556 }, { "epoch": 1.01, "learning_rate": 1.5500484583510685e-05, "loss": 0.4113, "step": 1557 }, { "epoch": 1.01, "learning_rate": 1.549465809743607e-05, "loss": 0.4185, "step": 1558 }, { "epoch": 1.01, "learning_rate": 1.5488828938086444e-05, "loss": 0.4531, "step": 1559 }, { "epoch": 1.01, "learning_rate": 1.5482997108297834e-05, "loss": 0.4381, "step": 1560 }, { "epoch": 1.01, "learning_rate": 1.547716261090755e-05, "loss": 0.4539, "step": 1561 }, { "epoch": 1.01, "learning_rate": 1.5471325448754207e-05, "loss": 0.4189, "step": 1562 }, { "epoch": 1.01, "learning_rate": 1.546548562467772e-05, "loss": 0.4226, "step": 1563 }, { "epoch": 1.01, "learning_rate": 1.545964314151929e-05, "loss": 0.4294, "step": 1564 }, { "epoch": 1.01, "learning_rate": 1.545379800212142e-05, "loss": 0.4324, "step": 1565 }, { "epoch": 1.01, "learning_rate": 1.5447950209327905e-05, "loss": 0.4274, "step": 1566 }, { "epoch": 1.01, "learning_rate": 1.544209976598382e-05, "loss": 0.4211, "step": 1567 }, { "epoch": 1.01, "learning_rate": 1.5436246674935543e-05, "loss": 0.4201, "step": 1568 }, { "epoch": 1.01, "learning_rate": 1.5430390939030734e-05, "loss": 0.4164, "step": 1569 }, { "epoch": 1.01, "learning_rate": 1.542453256111834e-05, "loss": 0.3999, "step": 1570 }, { "epoch": 1.01, "learning_rate": 1.5418671544048597e-05, "loss": 0.4034, "step": 1571 }, { "epoch": 1.01, "learning_rate": 1.5412807890673015e-05, "loss": 0.4327, "step": 1572 }, { "epoch": 1.02, "learning_rate": 1.54069416038444e-05, "loss": 0.4145, "step": 1573 }, { "epoch": 1.02, "learning_rate": 1.5401072686416826e-05, "loss": 0.4092, "step": 1574 }, { "epoch": 1.02, "learning_rate": 1.5395201141245662e-05, "loss": 0.4275, "step": 1575 }, { "epoch": 1.02, "learning_rate": 1.5389326971187543e-05, "loss": 0.4166, "step": 1576 }, { "epoch": 1.02, "learning_rate": 1.538345017910038e-05, "loss": 0.415, "step": 1577 }, { "epoch": 1.02, "learning_rate": 1.5377570767843377e-05, "loss": 0.4251, "step": 1578 }, { "epoch": 1.02, "learning_rate": 1.5371688740276988e-05, "loss": 0.4235, "step": 1579 }, { "epoch": 1.02, "learning_rate": 1.536580409926296e-05, "loss": 0.4399, "step": 1580 }, { "epoch": 1.02, "learning_rate": 1.53599168476643e-05, "loss": 0.4129, "step": 1581 }, { "epoch": 1.02, "learning_rate": 1.5354026988345284e-05, "loss": 0.4214, "step": 1582 }, { "epoch": 1.02, "learning_rate": 1.534813452417147e-05, "loss": 0.4435, "step": 1583 }, { "epoch": 1.02, "learning_rate": 1.5342239458009675e-05, "loss": 0.4283, "step": 1584 }, { "epoch": 1.02, "learning_rate": 1.533634179272797e-05, "loss": 0.4392, "step": 1585 }, { "epoch": 1.02, "learning_rate": 1.5330441531195714e-05, "loss": 0.4045, "step": 1586 }, { "epoch": 1.02, "learning_rate": 1.5324538676283512e-05, "loss": 0.4215, "step": 1587 }, { "epoch": 1.03, "learning_rate": 1.5318633230863237e-05, "loss": 0.4385, "step": 1588 }, { "epoch": 1.03, "learning_rate": 1.531272519780802e-05, "loss": 0.4292, "step": 1589 }, { "epoch": 1.03, "learning_rate": 1.5306814579992254e-05, "loss": 0.4234, "step": 1590 }, { "epoch": 1.03, "learning_rate": 1.5300901380291583e-05, "loss": 0.4342, "step": 1591 }, { "epoch": 1.03, "learning_rate": 1.5294985601582922e-05, "loss": 0.4395, "step": 1592 }, { "epoch": 1.03, "learning_rate": 1.5289067246744423e-05, "loss": 0.4022, "step": 1593 }, { "epoch": 1.03, "learning_rate": 1.52831463186555e-05, "loss": 0.4138, "step": 1594 }, { "epoch": 1.03, "learning_rate": 1.5277222820196816e-05, "loss": 0.4228, "step": 1595 }, { "epoch": 1.03, "learning_rate": 1.5271296754250296e-05, "loss": 0.4325, "step": 1596 }, { "epoch": 1.03, "learning_rate": 1.5265368123699093e-05, "loss": 0.4096, "step": 1597 }, { "epoch": 1.03, "learning_rate": 1.5259436931427624e-05, "loss": 0.4184, "step": 1598 }, { "epoch": 1.03, "learning_rate": 1.525350318032155e-05, "loss": 0.4267, "step": 1599 }, { "epoch": 1.03, "learning_rate": 1.524756687326777e-05, "loss": 0.4248, "step": 1600 }, { "epoch": 1.03, "learning_rate": 1.5241628013154435e-05, "loss": 0.4172, "step": 1601 }, { "epoch": 1.03, "learning_rate": 1.5235686602870932e-05, "loss": 0.4086, "step": 1602 }, { "epoch": 1.03, "learning_rate": 1.5229742645307894e-05, "loss": 0.4395, "step": 1603 }, { "epoch": 1.04, "learning_rate": 1.5223796143357188e-05, "loss": 0.4212, "step": 1604 }, { "epoch": 1.04, "learning_rate": 1.5217847099911919e-05, "loss": 0.4489, "step": 1605 }, { "epoch": 1.04, "learning_rate": 1.5211895517866437e-05, "loss": 0.4199, "step": 1606 }, { "epoch": 1.04, "learning_rate": 1.5205941400116316e-05, "loss": 0.4257, "step": 1607 }, { "epoch": 1.04, "learning_rate": 1.5199984749558367e-05, "loss": 0.4331, "step": 1608 }, { "epoch": 1.04, "learning_rate": 1.5194025569090638e-05, "loss": 0.4525, "step": 1609 }, { "epoch": 1.04, "learning_rate": 1.5188063861612405e-05, "loss": 0.4509, "step": 1610 }, { "epoch": 1.04, "learning_rate": 1.5182099630024172e-05, "loss": 0.4287, "step": 1611 }, { "epoch": 1.04, "learning_rate": 1.5176132877227674e-05, "loss": 0.4517, "step": 1612 }, { "epoch": 1.04, "learning_rate": 1.5170163606125867e-05, "loss": 0.4353, "step": 1613 }, { "epoch": 1.04, "learning_rate": 1.5164191819622937e-05, "loss": 0.4318, "step": 1614 }, { "epoch": 1.04, "learning_rate": 1.5158217520624296e-05, "loss": 0.422, "step": 1615 }, { "epoch": 1.04, "learning_rate": 1.5152240712036573e-05, "loss": 0.4314, "step": 1616 }, { "epoch": 1.04, "learning_rate": 1.5146261396767615e-05, "loss": 0.4422, "step": 1617 }, { "epoch": 1.04, "learning_rate": 1.51402795777265e-05, "loss": 0.3957, "step": 1618 }, { "epoch": 1.05, "learning_rate": 1.5134295257823519e-05, "loss": 0.4189, "step": 1619 }, { "epoch": 1.05, "learning_rate": 1.5128308439970174e-05, "loss": 0.4223, "step": 1620 }, { "epoch": 1.05, "learning_rate": 1.5122319127079185e-05, "loss": 0.4255, "step": 1621 }, { "epoch": 1.05, "learning_rate": 1.5116327322064497e-05, "loss": 0.4187, "step": 1622 }, { "epoch": 1.05, "learning_rate": 1.5110333027841248e-05, "loss": 0.4265, "step": 1623 }, { "epoch": 1.05, "learning_rate": 1.5104336247325803e-05, "loss": 0.4195, "step": 1624 }, { "epoch": 1.05, "learning_rate": 1.5098336983435729e-05, "loss": 0.4096, "step": 1625 }, { "epoch": 1.05, "learning_rate": 1.5092335239089803e-05, "loss": 0.4335, "step": 1626 }, { "epoch": 1.05, "learning_rate": 1.5086331017208011e-05, "loss": 0.4308, "step": 1627 }, { "epoch": 1.05, "learning_rate": 1.5080324320711542e-05, "loss": 0.4553, "step": 1628 }, { "epoch": 1.05, "learning_rate": 1.5074315152522787e-05, "loss": 0.4162, "step": 1629 }, { "epoch": 1.05, "learning_rate": 1.506830351556534e-05, "loss": 0.43, "step": 1630 }, { "epoch": 1.05, "learning_rate": 1.5062289412764003e-05, "loss": 0.4262, "step": 1631 }, { "epoch": 1.05, "learning_rate": 1.505627284704477e-05, "loss": 0.4305, "step": 1632 }, { "epoch": 1.05, "learning_rate": 1.5050253821334837e-05, "loss": 0.4365, "step": 1633 }, { "epoch": 1.05, "learning_rate": 1.504423233856259e-05, "loss": 0.4106, "step": 1634 }, { "epoch": 1.06, "learning_rate": 1.5038208401657626e-05, "loss": 0.4323, "step": 1635 }, { "epoch": 1.06, "learning_rate": 1.5032182013550719e-05, "loss": 0.4318, "step": 1636 }, { "epoch": 1.06, "learning_rate": 1.5026153177173841e-05, "loss": 0.4331, "step": 1637 }, { "epoch": 1.06, "learning_rate": 1.5020121895460165e-05, "loss": 0.4387, "step": 1638 }, { "epoch": 1.06, "learning_rate": 1.501408817134404e-05, "loss": 0.4137, "step": 1639 }, { "epoch": 1.06, "learning_rate": 1.5008052007761009e-05, "loss": 0.4333, "step": 1640 }, { "epoch": 1.06, "learning_rate": 1.5002013407647801e-05, "loss": 0.4017, "step": 1641 }, { "epoch": 1.06, "learning_rate": 1.4995972373942334e-05, "loss": 0.4188, "step": 1642 }, { "epoch": 1.06, "learning_rate": 1.4989928909583701e-05, "loss": 0.4387, "step": 1643 }, { "epoch": 1.06, "learning_rate": 1.498388301751219e-05, "loss": 0.435, "step": 1644 }, { "epoch": 1.06, "learning_rate": 1.4977834700669265e-05, "loss": 0.4283, "step": 1645 }, { "epoch": 1.06, "learning_rate": 1.4971783961997561e-05, "loss": 0.4484, "step": 1646 }, { "epoch": 1.06, "learning_rate": 1.4965730804440905e-05, "loss": 0.4497, "step": 1647 }, { "epoch": 1.06, "learning_rate": 1.495967523094429e-05, "loss": 0.4303, "step": 1648 }, { "epoch": 1.06, "learning_rate": 1.4953617244453895e-05, "loss": 0.4389, "step": 1649 }, { "epoch": 1.07, "learning_rate": 1.4947556847917062e-05, "loss": 0.4151, "step": 1650 }, { "epoch": 1.07, "learning_rate": 1.4941494044282313e-05, "loss": 0.4287, "step": 1651 }, { "epoch": 1.07, "learning_rate": 1.4935428836499333e-05, "loss": 0.4211, "step": 1652 }, { "epoch": 1.07, "learning_rate": 1.4929361227518989e-05, "loss": 0.4279, "step": 1653 }, { "epoch": 1.07, "learning_rate": 1.4923291220293307e-05, "loss": 0.4511, "step": 1654 }, { "epoch": 1.07, "learning_rate": 1.4917218817775485e-05, "loss": 0.4054, "step": 1655 }, { "epoch": 1.07, "learning_rate": 1.4911144022919879e-05, "loss": 0.4259, "step": 1656 }, { "epoch": 1.07, "learning_rate": 1.4905066838682017e-05, "loss": 0.4055, "step": 1657 }, { "epoch": 1.07, "learning_rate": 1.4898987268018586e-05, "loss": 0.3893, "step": 1658 }, { "epoch": 1.07, "learning_rate": 1.4892905313887432e-05, "loss": 0.4126, "step": 1659 }, { "epoch": 1.07, "learning_rate": 1.4886820979247561e-05, "loss": 0.4176, "step": 1660 }, { "epoch": 1.07, "learning_rate": 1.4880734267059142e-05, "loss": 0.4571, "step": 1661 }, { "epoch": 1.07, "learning_rate": 1.48746451802835e-05, "loss": 0.4408, "step": 1662 }, { "epoch": 1.07, "learning_rate": 1.4868553721883113e-05, "loss": 0.4488, "step": 1663 }, { "epoch": 1.07, "learning_rate": 1.4862459894821606e-05, "loss": 0.4455, "step": 1664 }, { "epoch": 1.07, "learning_rate": 1.485636370206377e-05, "loss": 0.4229, "step": 1665 }, { "epoch": 1.08, "learning_rate": 1.4850265146575535e-05, "loss": 0.4197, "step": 1666 }, { "epoch": 1.08, "learning_rate": 1.4844164231323992e-05, "loss": 0.4417, "step": 1667 }, { "epoch": 1.08, "learning_rate": 1.483806095927737e-05, "loss": 0.4093, "step": 1668 }, { "epoch": 1.08, "learning_rate": 1.4831955333405044e-05, "loss": 0.4296, "step": 1669 }, { "epoch": 1.08, "learning_rate": 1.4825847356677546e-05, "loss": 0.4231, "step": 1670 }, { "epoch": 1.08, "learning_rate": 1.481973703206654e-05, "loss": 0.43, "step": 1671 }, { "epoch": 1.08, "learning_rate": 1.481362436254484e-05, "loss": 0.4271, "step": 1672 }, { "epoch": 1.08, "learning_rate": 1.4807509351086395e-05, "loss": 0.4426, "step": 1673 }, { "epoch": 1.08, "learning_rate": 1.4801392000666297e-05, "loss": 0.4248, "step": 1674 }, { "epoch": 1.08, "learning_rate": 1.4795272314260776e-05, "loss": 0.421, "step": 1675 }, { "epoch": 1.08, "learning_rate": 1.4789150294847192e-05, "loss": 0.4428, "step": 1676 }, { "epoch": 1.08, "learning_rate": 1.4783025945404053e-05, "loss": 0.4245, "step": 1677 }, { "epoch": 1.08, "learning_rate": 1.4776899268910985e-05, "loss": 0.4227, "step": 1678 }, { "epoch": 1.08, "learning_rate": 1.477077026834876e-05, "loss": 0.4246, "step": 1679 }, { "epoch": 1.08, "learning_rate": 1.4764638946699275e-05, "loss": 0.4097, "step": 1680 }, { "epoch": 1.09, "learning_rate": 1.4758505306945551e-05, "loss": 0.4201, "step": 1681 }, { "epoch": 1.09, "learning_rate": 1.475236935207175e-05, "loss": 0.4363, "step": 1682 }, { "epoch": 1.09, "learning_rate": 1.4746231085063146e-05, "loss": 0.4354, "step": 1683 }, { "epoch": 1.09, "learning_rate": 1.4740090508906147e-05, "loss": 0.4017, "step": 1684 }, { "epoch": 1.09, "learning_rate": 1.4733947626588279e-05, "loss": 0.4261, "step": 1685 }, { "epoch": 1.09, "learning_rate": 1.4727802441098193e-05, "loss": 0.4361, "step": 1686 }, { "epoch": 1.09, "learning_rate": 1.4721654955425662e-05, "loss": 0.4315, "step": 1687 }, { "epoch": 1.09, "learning_rate": 1.4715505172561577e-05, "loss": 0.4531, "step": 1688 }, { "epoch": 1.09, "learning_rate": 1.4709353095497947e-05, "loss": 0.4449, "step": 1689 }, { "epoch": 1.09, "learning_rate": 1.4703198727227892e-05, "loss": 0.4278, "step": 1690 }, { "epoch": 1.09, "learning_rate": 1.4697042070745654e-05, "loss": 0.4389, "step": 1691 }, { "epoch": 1.09, "learning_rate": 1.4690883129046585e-05, "loss": 0.4188, "step": 1692 }, { "epoch": 1.09, "learning_rate": 1.4684721905127147e-05, "loss": 0.4251, "step": 1693 }, { "epoch": 1.09, "learning_rate": 1.4678558401984915e-05, "loss": 0.4103, "step": 1694 }, { "epoch": 1.09, "learning_rate": 1.4672392622618571e-05, "loss": 0.4007, "step": 1695 }, { "epoch": 1.09, "learning_rate": 1.466622457002791e-05, "loss": 0.3978, "step": 1696 }, { "epoch": 1.1, "learning_rate": 1.4660054247213826e-05, "loss": 0.4144, "step": 1697 }, { "epoch": 1.1, "learning_rate": 1.4653881657178317e-05, "loss": 0.428, "step": 1698 }, { "epoch": 1.1, "learning_rate": 1.4647706802924493e-05, "loss": 0.4441, "step": 1699 }, { "epoch": 1.1, "learning_rate": 1.4641529687456558e-05, "loss": 0.3971, "step": 1700 }, { "epoch": 1.1, "learning_rate": 1.4635350313779815e-05, "loss": 0.4287, "step": 1701 }, { "epoch": 1.1, "learning_rate": 1.462916868490067e-05, "loss": 0.4084, "step": 1702 }, { "epoch": 1.1, "learning_rate": 1.4622984803826625e-05, "loss": 0.4152, "step": 1703 }, { "epoch": 1.1, "learning_rate": 1.4616798673566276e-05, "loss": 0.4461, "step": 1704 }, { "epoch": 1.1, "learning_rate": 1.461061029712932e-05, "loss": 0.431, "step": 1705 }, { "epoch": 1.1, "learning_rate": 1.4604419677526536e-05, "loss": 0.4169, "step": 1706 }, { "epoch": 1.1, "learning_rate": 1.4598226817769803e-05, "loss": 0.4247, "step": 1707 }, { "epoch": 1.1, "learning_rate": 1.4592031720872086e-05, "loss": 0.4144, "step": 1708 }, { "epoch": 1.1, "learning_rate": 1.4585834389847439e-05, "loss": 0.4397, "step": 1709 }, { "epoch": 1.1, "learning_rate": 1.4579634827711004e-05, "loss": 0.4234, "step": 1710 }, { "epoch": 1.1, "learning_rate": 1.4573433037479006e-05, "loss": 0.4144, "step": 1711 }, { "epoch": 1.11, "learning_rate": 1.4567229022168756e-05, "loss": 0.4489, "step": 1712 }, { "epoch": 1.11, "learning_rate": 1.4561022784798648e-05, "loss": 0.4332, "step": 1713 }, { "epoch": 1.11, "learning_rate": 1.4554814328388158e-05, "loss": 0.4347, "step": 1714 }, { "epoch": 1.11, "learning_rate": 1.4548603655957839e-05, "loss": 0.4232, "step": 1715 }, { "epoch": 1.11, "learning_rate": 1.454239077052932e-05, "loss": 0.4221, "step": 1716 }, { "epoch": 1.11, "learning_rate": 1.4536175675125315e-05, "loss": 0.412, "step": 1717 }, { "epoch": 1.11, "learning_rate": 1.4529958372769603e-05, "loss": 0.4394, "step": 1718 }, { "epoch": 1.11, "learning_rate": 1.4523738866487048e-05, "loss": 0.4208, "step": 1719 }, { "epoch": 1.11, "learning_rate": 1.4517517159303573e-05, "loss": 0.4212, "step": 1720 }, { "epoch": 1.11, "learning_rate": 1.4511293254246179e-05, "loss": 0.4202, "step": 1721 }, { "epoch": 1.11, "learning_rate": 1.4505067154342944e-05, "loss": 0.4165, "step": 1722 }, { "epoch": 1.11, "learning_rate": 1.4498838862622997e-05, "loss": 0.4459, "step": 1723 }, { "epoch": 1.11, "learning_rate": 1.4492608382116548e-05, "loss": 0.436, "step": 1724 }, { "epoch": 1.11, "learning_rate": 1.4486375715854866e-05, "loss": 0.4473, "step": 1725 }, { "epoch": 1.11, "learning_rate": 1.4480140866870281e-05, "loss": 0.4286, "step": 1726 }, { "epoch": 1.12, "learning_rate": 1.4473903838196188e-05, "loss": 0.4274, "step": 1727 }, { "epoch": 1.12, "learning_rate": 1.4467664632867042e-05, "loss": 0.4466, "step": 1728 }, { "epoch": 1.12, "learning_rate": 1.4461423253918358e-05, "loss": 0.4257, "step": 1729 }, { "epoch": 1.12, "learning_rate": 1.4455179704386706e-05, "loss": 0.4555, "step": 1730 }, { "epoch": 1.12, "learning_rate": 1.4448933987309715e-05, "loss": 0.4351, "step": 1731 }, { "epoch": 1.12, "learning_rate": 1.4442686105726066e-05, "loss": 0.4299, "step": 1732 }, { "epoch": 1.12, "learning_rate": 1.4436436062675498e-05, "loss": 0.4439, "step": 1733 }, { "epoch": 1.12, "learning_rate": 1.4430183861198792e-05, "loss": 0.4385, "step": 1734 }, { "epoch": 1.12, "learning_rate": 1.442392950433779e-05, "loss": 0.4536, "step": 1735 }, { "epoch": 1.12, "learning_rate": 1.4417672995135372e-05, "loss": 0.4551, "step": 1736 }, { "epoch": 1.12, "learning_rate": 1.4411414336635476e-05, "loss": 0.4175, "step": 1737 }, { "epoch": 1.12, "learning_rate": 1.440515353188308e-05, "loss": 0.4151, "step": 1738 }, { "epoch": 1.12, "learning_rate": 1.4398890583924202e-05, "loss": 0.4208, "step": 1739 }, { "epoch": 1.12, "learning_rate": 1.4392625495805913e-05, "loss": 0.4148, "step": 1740 }, { "epoch": 1.12, "learning_rate": 1.4386358270576315e-05, "loss": 0.421, "step": 1741 }, { "epoch": 1.12, "learning_rate": 1.4380088911284557e-05, "loss": 0.4215, "step": 1742 }, { "epoch": 1.13, "learning_rate": 1.4373817420980824e-05, "loss": 0.4091, "step": 1743 }, { "epoch": 1.13, "learning_rate": 1.4367543802716334e-05, "loss": 0.4236, "step": 1744 }, { "epoch": 1.13, "learning_rate": 1.4361268059543347e-05, "loss": 0.4137, "step": 1745 }, { "epoch": 1.13, "learning_rate": 1.4354990194515155e-05, "loss": 0.4419, "step": 1746 }, { "epoch": 1.13, "learning_rate": 1.4348710210686074e-05, "loss": 0.4236, "step": 1747 }, { "epoch": 1.13, "learning_rate": 1.4342428111111461e-05, "loss": 0.4119, "step": 1748 }, { "epoch": 1.13, "learning_rate": 1.4336143898847705e-05, "loss": 0.4058, "step": 1749 }, { "epoch": 1.13, "learning_rate": 1.4329857576952212e-05, "loss": 0.4165, "step": 1750 }, { "epoch": 1.13, "learning_rate": 1.4323569148483417e-05, "loss": 0.4505, "step": 1751 }, { "epoch": 1.13, "learning_rate": 1.4317278616500785e-05, "loss": 0.4335, "step": 1752 }, { "epoch": 1.13, "learning_rate": 1.43109859840648e-05, "loss": 0.4254, "step": 1753 }, { "epoch": 1.13, "learning_rate": 1.430469125423697e-05, "loss": 0.4323, "step": 1754 }, { "epoch": 1.13, "learning_rate": 1.4298394430079826e-05, "loss": 0.4393, "step": 1755 }, { "epoch": 1.13, "learning_rate": 1.4292095514656907e-05, "loss": 0.4096, "step": 1756 }, { "epoch": 1.13, "learning_rate": 1.4285794511032788e-05, "loss": 0.4137, "step": 1757 }, { "epoch": 1.14, "learning_rate": 1.4279491422273043e-05, "loss": 0.43, "step": 1758 }, { "epoch": 1.14, "learning_rate": 1.4273186251444267e-05, "loss": 0.4446, "step": 1759 }, { "epoch": 1.14, "learning_rate": 1.4266879001614067e-05, "loss": 0.4163, "step": 1760 }, { "epoch": 1.14, "learning_rate": 1.4260569675851064e-05, "loss": 0.4288, "step": 1761 }, { "epoch": 1.14, "learning_rate": 1.4254258277224888e-05, "loss": 0.4148, "step": 1762 }, { "epoch": 1.14, "learning_rate": 1.4247944808806176e-05, "loss": 0.4248, "step": 1763 }, { "epoch": 1.14, "learning_rate": 1.424162927366657e-05, "loss": 0.428, "step": 1764 }, { "epoch": 1.14, "learning_rate": 1.4235311674878723e-05, "loss": 0.432, "step": 1765 }, { "epoch": 1.14, "learning_rate": 1.4228992015516287e-05, "loss": 0.422, "step": 1766 }, { "epoch": 1.14, "learning_rate": 1.4222670298653922e-05, "loss": 0.4355, "step": 1767 }, { "epoch": 1.14, "learning_rate": 1.4216346527367284e-05, "loss": 0.4433, "step": 1768 }, { "epoch": 1.14, "learning_rate": 1.4210020704733033e-05, "loss": 0.416, "step": 1769 }, { "epoch": 1.14, "learning_rate": 1.4203692833828817e-05, "loss": 0.4416, "step": 1770 }, { "epoch": 1.14, "learning_rate": 1.4197362917733295e-05, "loss": 0.4247, "step": 1771 }, { "epoch": 1.14, "learning_rate": 1.4191030959526106e-05, "loss": 0.4045, "step": 1772 }, { "epoch": 1.14, "learning_rate": 1.4184696962287901e-05, "loss": 0.4213, "step": 1773 }, { "epoch": 1.15, "learning_rate": 1.4178360929100303e-05, "loss": 0.4494, "step": 1774 }, { "epoch": 1.15, "learning_rate": 1.4172022863045942e-05, "loss": 0.45, "step": 1775 }, { "epoch": 1.15, "learning_rate": 1.4165682767208426e-05, "loss": 0.4267, "step": 1776 }, { "epoch": 1.15, "learning_rate": 1.4159340644672358e-05, "loss": 0.4263, "step": 1777 }, { "epoch": 1.15, "learning_rate": 1.4152996498523317e-05, "loss": 0.456, "step": 1778 }, { "epoch": 1.15, "learning_rate": 1.4146650331847879e-05, "loss": 0.4182, "step": 1779 }, { "epoch": 1.15, "learning_rate": 1.4140302147733596e-05, "loss": 0.4237, "step": 1780 }, { "epoch": 1.15, "learning_rate": 1.4133951949269e-05, "loss": 0.4633, "step": 1781 }, { "epoch": 1.15, "learning_rate": 1.4127599739543606e-05, "loss": 0.4266, "step": 1782 }, { "epoch": 1.15, "learning_rate": 1.4121245521647913e-05, "loss": 0.444, "step": 1783 }, { "epoch": 1.15, "learning_rate": 1.4114889298673383e-05, "loss": 0.4167, "step": 1784 }, { "epoch": 1.15, "learning_rate": 1.4108531073712468e-05, "loss": 0.4189, "step": 1785 }, { "epoch": 1.15, "learning_rate": 1.4102170849858583e-05, "loss": 0.4451, "step": 1786 }, { "epoch": 1.15, "learning_rate": 1.4095808630206125e-05, "loss": 0.4512, "step": 1787 }, { "epoch": 1.15, "learning_rate": 1.4089444417850455e-05, "loss": 0.4174, "step": 1788 }, { "epoch": 1.16, "learning_rate": 1.4083078215887904e-05, "loss": 0.4143, "step": 1789 }, { "epoch": 1.16, "learning_rate": 1.4076710027415776e-05, "loss": 0.4251, "step": 1790 }, { "epoch": 1.16, "learning_rate": 1.4070339855532333e-05, "loss": 0.4487, "step": 1791 }, { "epoch": 1.16, "learning_rate": 1.4063967703336814e-05, "loss": 0.436, "step": 1792 }, { "epoch": 1.16, "learning_rate": 1.4057593573929413e-05, "loss": 0.4334, "step": 1793 }, { "epoch": 1.16, "learning_rate": 1.4051217470411284e-05, "loss": 0.434, "step": 1794 }, { "epoch": 1.16, "learning_rate": 1.4044839395884546e-05, "loss": 0.4387, "step": 1795 }, { "epoch": 1.16, "learning_rate": 1.403845935345228e-05, "loss": 0.441, "step": 1796 }, { "epoch": 1.16, "learning_rate": 1.4032077346218518e-05, "loss": 0.4367, "step": 1797 }, { "epoch": 1.16, "learning_rate": 1.4025693377288246e-05, "loss": 0.4298, "step": 1798 }, { "epoch": 1.16, "learning_rate": 1.4019307449767414e-05, "loss": 0.424, "step": 1799 }, { "epoch": 1.16, "learning_rate": 1.401291956676292e-05, "loss": 0.4249, "step": 1800 }, { "epoch": 1.16, "learning_rate": 1.4006529731382609e-05, "loss": 0.4078, "step": 1801 }, { "epoch": 1.16, "learning_rate": 1.4000137946735284e-05, "loss": 0.4224, "step": 1802 }, { "epoch": 1.16, "learning_rate": 1.3993744215930694e-05, "loss": 0.4473, "step": 1803 }, { "epoch": 1.16, "learning_rate": 1.3987348542079526e-05, "loss": 0.4017, "step": 1804 }, { "epoch": 1.17, "learning_rate": 1.3980950928293424e-05, "loss": 0.4287, "step": 1805 }, { "epoch": 1.17, "learning_rate": 1.397455137768497e-05, "loss": 0.4507, "step": 1806 }, { "epoch": 1.17, "learning_rate": 1.396814989336769e-05, "loss": 0.4349, "step": 1807 }, { "epoch": 1.17, "learning_rate": 1.396174647845605e-05, "loss": 0.4451, "step": 1808 }, { "epoch": 1.17, "learning_rate": 1.395534113606546e-05, "loss": 0.4325, "step": 1809 }, { "epoch": 1.17, "learning_rate": 1.3948933869312258e-05, "loss": 0.4233, "step": 1810 }, { "epoch": 1.17, "learning_rate": 1.3942524681313722e-05, "loss": 0.4138, "step": 1811 }, { "epoch": 1.17, "learning_rate": 1.3936113575188074e-05, "loss": 0.4304, "step": 1812 }, { "epoch": 1.17, "learning_rate": 1.3929700554054457e-05, "loss": 0.4239, "step": 1813 }, { "epoch": 1.17, "learning_rate": 1.392328562103295e-05, "loss": 0.4251, "step": 1814 }, { "epoch": 1.17, "learning_rate": 1.3916868779244563e-05, "loss": 0.4382, "step": 1815 }, { "epoch": 1.17, "learning_rate": 1.3910450031811235e-05, "loss": 0.4424, "step": 1816 }, { "epoch": 1.17, "learning_rate": 1.3904029381855831e-05, "loss": 0.4181, "step": 1817 }, { "epoch": 1.17, "learning_rate": 1.389760683250214e-05, "loss": 0.3904, "step": 1818 }, { "epoch": 1.17, "learning_rate": 1.389118238687488e-05, "loss": 0.4131, "step": 1819 }, { "epoch": 1.18, "learning_rate": 1.3884756048099688e-05, "loss": 0.4324, "step": 1820 }, { "epoch": 1.18, "learning_rate": 1.3878327819303121e-05, "loss": 0.4285, "step": 1821 }, { "epoch": 1.18, "learning_rate": 1.3871897703612658e-05, "loss": 0.4259, "step": 1822 }, { "epoch": 1.18, "learning_rate": 1.3865465704156696e-05, "loss": 0.4444, "step": 1823 }, { "epoch": 1.18, "learning_rate": 1.3859031824064543e-05, "loss": 0.4103, "step": 1824 }, { "epoch": 1.18, "learning_rate": 1.3852596066466437e-05, "loss": 0.433, "step": 1825 }, { "epoch": 1.18, "learning_rate": 1.3846158434493507e-05, "loss": 0.4573, "step": 1826 }, { "epoch": 1.18, "learning_rate": 1.3839718931277814e-05, "loss": 0.4033, "step": 1827 }, { "epoch": 1.18, "learning_rate": 1.3833277559952323e-05, "loss": 0.4026, "step": 1828 }, { "epoch": 1.18, "learning_rate": 1.3826834323650899e-05, "loss": 0.4317, "step": 1829 }, { "epoch": 1.18, "learning_rate": 1.3820389225508327e-05, "loss": 0.4102, "step": 1830 }, { "epoch": 1.18, "learning_rate": 1.381394226866029e-05, "loss": 0.4184, "step": 1831 }, { "epoch": 1.18, "learning_rate": 1.380749345624338e-05, "loss": 0.4288, "step": 1832 }, { "epoch": 1.18, "learning_rate": 1.3801042791395084e-05, "loss": 0.4268, "step": 1833 }, { "epoch": 1.18, "learning_rate": 1.3794590277253803e-05, "loss": 0.4421, "step": 1834 }, { "epoch": 1.18, "learning_rate": 1.3788135916958827e-05, "loss": 0.4318, "step": 1835 }, { "epoch": 1.19, "learning_rate": 1.3781679713650349e-05, "loss": 0.4157, "step": 1836 }, { "epoch": 1.19, "learning_rate": 1.3775221670469457e-05, "loss": 0.4109, "step": 1837 }, { "epoch": 1.19, "learning_rate": 1.3768761790558134e-05, "loss": 0.4314, "step": 1838 }, { "epoch": 1.19, "learning_rate": 1.3762300077059258e-05, "loss": 0.4345, "step": 1839 }, { "epoch": 1.19, "learning_rate": 1.3755836533116597e-05, "loss": 0.4199, "step": 1840 }, { "epoch": 1.19, "learning_rate": 1.3749371161874813e-05, "loss": 0.4242, "step": 1841 }, { "epoch": 1.19, "learning_rate": 1.374290396647945e-05, "loss": 0.445, "step": 1842 }, { "epoch": 1.19, "learning_rate": 1.373643495007695e-05, "loss": 0.4404, "step": 1843 }, { "epoch": 1.19, "learning_rate": 1.3729964115814636e-05, "loss": 0.4236, "step": 1844 }, { "epoch": 1.19, "learning_rate": 1.372349146684071e-05, "loss": 0.4211, "step": 1845 }, { "epoch": 1.19, "learning_rate": 1.371701700630426e-05, "loss": 0.4167, "step": 1846 }, { "epoch": 1.19, "learning_rate": 1.3710540737355265e-05, "loss": 0.4061, "step": 1847 }, { "epoch": 1.19, "learning_rate": 1.3704062663144569e-05, "loss": 0.4296, "step": 1848 }, { "epoch": 1.19, "learning_rate": 1.3697582786823902e-05, "loss": 0.4353, "step": 1849 }, { "epoch": 1.19, "learning_rate": 1.3691101111545873e-05, "loss": 0.4424, "step": 1850 }, { "epoch": 1.2, "learning_rate": 1.368461764046396e-05, "loss": 0.4295, "step": 1851 }, { "epoch": 1.2, "learning_rate": 1.3678132376732518e-05, "loss": 0.4576, "step": 1852 }, { "epoch": 1.2, "learning_rate": 1.3671645323506776e-05, "loss": 0.417, "step": 1853 }, { "epoch": 1.2, "learning_rate": 1.3665156483942834e-05, "loss": 0.4234, "step": 1854 }, { "epoch": 1.2, "learning_rate": 1.3658665861197653e-05, "loss": 0.4353, "step": 1855 }, { "epoch": 1.2, "learning_rate": 1.3652173458429068e-05, "loss": 0.4433, "step": 1856 }, { "epoch": 1.2, "learning_rate": 1.3645679278795783e-05, "loss": 0.4451, "step": 1857 }, { "epoch": 1.2, "learning_rate": 1.363918332545736e-05, "loss": 0.4224, "step": 1858 }, { "epoch": 1.2, "learning_rate": 1.3632685601574225e-05, "loss": 0.4191, "step": 1859 }, { "epoch": 1.2, "learning_rate": 1.3626186110307673e-05, "loss": 0.4235, "step": 1860 }, { "epoch": 1.2, "learning_rate": 1.3619684854819845e-05, "loss": 0.418, "step": 1861 }, { "epoch": 1.2, "learning_rate": 1.3613181838273758e-05, "loss": 0.439, "step": 1862 }, { "epoch": 1.2, "learning_rate": 1.3606677063833269e-05, "loss": 0.4185, "step": 1863 }, { "epoch": 1.2, "learning_rate": 1.3600170534663097e-05, "loss": 0.4339, "step": 1864 }, { "epoch": 1.2, "learning_rate": 1.3593662253928822e-05, "loss": 0.4443, "step": 1865 }, { "epoch": 1.2, "learning_rate": 1.358715222479686e-05, "loss": 0.4144, "step": 1866 }, { "epoch": 1.21, "learning_rate": 1.3580640450434494e-05, "loss": 0.4302, "step": 1867 }, { "epoch": 1.21, "learning_rate": 1.3574126934009843e-05, "loss": 0.4452, "step": 1868 }, { "epoch": 1.21, "learning_rate": 1.3567611678691888e-05, "loss": 0.4109, "step": 1869 }, { "epoch": 1.21, "learning_rate": 1.356109468765044e-05, "loss": 0.4378, "step": 1870 }, { "epoch": 1.21, "learning_rate": 1.3554575964056165e-05, "loss": 0.4331, "step": 1871 }, { "epoch": 1.21, "learning_rate": 1.3548055511080568e-05, "loss": 0.4448, "step": 1872 }, { "epoch": 1.21, "learning_rate": 1.3541533331895997e-05, "loss": 0.4383, "step": 1873 }, { "epoch": 1.21, "learning_rate": 1.3535009429675641e-05, "loss": 0.4276, "step": 1874 }, { "epoch": 1.21, "learning_rate": 1.3528483807593522e-05, "loss": 0.4409, "step": 1875 }, { "epoch": 1.21, "learning_rate": 1.3521956468824505e-05, "loss": 0.4052, "step": 1876 }, { "epoch": 1.21, "learning_rate": 1.3515427416544283e-05, "loss": 0.4433, "step": 1877 }, { "epoch": 1.21, "learning_rate": 1.3508896653929392e-05, "loss": 0.4254, "step": 1878 }, { "epoch": 1.21, "learning_rate": 1.3502364184157195e-05, "loss": 0.4589, "step": 1879 }, { "epoch": 1.21, "learning_rate": 1.3495830010405884e-05, "loss": 0.4225, "step": 1880 }, { "epoch": 1.21, "learning_rate": 1.3489294135854481e-05, "loss": 0.423, "step": 1881 }, { "epoch": 1.22, "learning_rate": 1.3482756563682837e-05, "loss": 0.4398, "step": 1882 }, { "epoch": 1.22, "learning_rate": 1.347621729707163e-05, "loss": 0.4085, "step": 1883 }, { "epoch": 1.22, "learning_rate": 1.346967633920236e-05, "loss": 0.4178, "step": 1884 }, { "epoch": 1.22, "learning_rate": 1.3463133693257345e-05, "loss": 0.4221, "step": 1885 }, { "epoch": 1.22, "learning_rate": 1.3456589362419739e-05, "loss": 0.4123, "step": 1886 }, { "epoch": 1.22, "learning_rate": 1.3450043349873499e-05, "loss": 0.4108, "step": 1887 }, { "epoch": 1.22, "learning_rate": 1.344349565880341e-05, "loss": 0.4285, "step": 1888 }, { "epoch": 1.22, "learning_rate": 1.3436946292395075e-05, "loss": 0.4486, "step": 1889 }, { "epoch": 1.22, "learning_rate": 1.3430395253834902e-05, "loss": 0.4061, "step": 1890 }, { "epoch": 1.22, "learning_rate": 1.3423842546310123e-05, "loss": 0.438, "step": 1891 }, { "epoch": 1.22, "learning_rate": 1.3417288173008778e-05, "loss": 0.4321, "step": 1892 }, { "epoch": 1.22, "learning_rate": 1.3410732137119716e-05, "loss": 0.4401, "step": 1893 }, { "epoch": 1.22, "learning_rate": 1.3404174441832592e-05, "loss": 0.4331, "step": 1894 }, { "epoch": 1.22, "learning_rate": 1.3397615090337881e-05, "loss": 0.4364, "step": 1895 }, { "epoch": 1.22, "learning_rate": 1.339105408582685e-05, "loss": 0.4292, "step": 1896 }, { "epoch": 1.22, "learning_rate": 1.338449143149158e-05, "loss": 0.4089, "step": 1897 }, { "epoch": 1.23, "learning_rate": 1.3377927130524943e-05, "loss": 0.4108, "step": 1898 }, { "epoch": 1.23, "learning_rate": 1.3371361186120627e-05, "loss": 0.4358, "step": 1899 }, { "epoch": 1.23, "learning_rate": 1.3364793601473105e-05, "loss": 0.4161, "step": 1900 }, { "epoch": 1.23, "learning_rate": 1.335822437977766e-05, "loss": 0.4309, "step": 1901 }, { "epoch": 1.23, "learning_rate": 1.3351653524230366e-05, "loss": 0.436, "step": 1902 }, { "epoch": 1.23, "learning_rate": 1.3345081038028086e-05, "loss": 0.4152, "step": 1903 }, { "epoch": 1.23, "learning_rate": 1.3338506924368494e-05, "loss": 0.4258, "step": 1904 }, { "epoch": 1.23, "learning_rate": 1.3331931186450038e-05, "loss": 0.442, "step": 1905 }, { "epoch": 1.23, "learning_rate": 1.332535382747196e-05, "loss": 0.4513, "step": 1906 }, { "epoch": 1.23, "learning_rate": 1.3318774850634297e-05, "loss": 0.4156, "step": 1907 }, { "epoch": 1.23, "learning_rate": 1.331219425913787e-05, "loss": 0.4455, "step": 1908 }, { "epoch": 1.23, "learning_rate": 1.3305612056184285e-05, "loss": 0.4265, "step": 1909 }, { "epoch": 1.23, "learning_rate": 1.3299028244975929e-05, "loss": 0.4418, "step": 1910 }, { "epoch": 1.23, "learning_rate": 1.3292442828715975e-05, "loss": 0.4507, "step": 1911 }, { "epoch": 1.23, "learning_rate": 1.3285855810608377e-05, "loss": 0.4064, "step": 1912 }, { "epoch": 1.24, "learning_rate": 1.3279267193857872e-05, "loss": 0.4164, "step": 1913 }, { "epoch": 1.24, "learning_rate": 1.3272676981669968e-05, "loss": 0.4185, "step": 1914 }, { "epoch": 1.24, "learning_rate": 1.3266085177250952e-05, "loss": 0.4287, "step": 1915 }, { "epoch": 1.24, "learning_rate": 1.325949178380788e-05, "loss": 0.4252, "step": 1916 }, { "epoch": 1.24, "learning_rate": 1.3252896804548594e-05, "loss": 0.4358, "step": 1917 }, { "epoch": 1.24, "learning_rate": 1.3246300242681698e-05, "loss": 0.4441, "step": 1918 }, { "epoch": 1.24, "learning_rate": 1.3239702101416568e-05, "loss": 0.4322, "step": 1919 }, { "epoch": 1.24, "learning_rate": 1.3233102383963341e-05, "loss": 0.4539, "step": 1920 }, { "epoch": 1.24, "learning_rate": 1.322650109353294e-05, "loss": 0.423, "step": 1921 }, { "epoch": 1.24, "learning_rate": 1.3219898233337036e-05, "loss": 0.4372, "step": 1922 }, { "epoch": 1.24, "learning_rate": 1.3213293806588068e-05, "loss": 0.4137, "step": 1923 }, { "epoch": 1.24, "learning_rate": 1.3206687816499242e-05, "loss": 0.4231, "step": 1924 }, { "epoch": 1.24, "learning_rate": 1.3200080266284519e-05, "loss": 0.4355, "step": 1925 }, { "epoch": 1.24, "learning_rate": 1.3193471159158621e-05, "loss": 0.4374, "step": 1926 }, { "epoch": 1.24, "learning_rate": 1.3186860498337028e-05, "loss": 0.4269, "step": 1927 }, { "epoch": 1.24, "learning_rate": 1.3180248287035977e-05, "loss": 0.4109, "step": 1928 }, { "epoch": 1.25, "learning_rate": 1.3173634528472452e-05, "loss": 0.4227, "step": 1929 }, { "epoch": 1.25, "learning_rate": 1.3167019225864203e-05, "loss": 0.4282, "step": 1930 }, { "epoch": 1.25, "learning_rate": 1.316040238242972e-05, "loss": 0.437, "step": 1931 }, { "epoch": 1.25, "learning_rate": 1.3153784001388249e-05, "loss": 0.4206, "step": 1932 }, { "epoch": 1.25, "learning_rate": 1.3147164085959778e-05, "loss": 0.4319, "step": 1933 }, { "epoch": 1.25, "learning_rate": 1.3140542639365047e-05, "loss": 0.4193, "step": 1934 }, { "epoch": 1.25, "learning_rate": 1.3133919664825539e-05, "loss": 0.4291, "step": 1935 }, { "epoch": 1.25, "learning_rate": 1.3127295165563476e-05, "loss": 0.4598, "step": 1936 }, { "epoch": 1.25, "learning_rate": 1.3120669144801834e-05, "loss": 0.4355, "step": 1937 }, { "epoch": 1.25, "learning_rate": 1.3114041605764319e-05, "loss": 0.4293, "step": 1938 }, { "epoch": 1.25, "learning_rate": 1.3107412551675373e-05, "loss": 0.4258, "step": 1939 }, { "epoch": 1.25, "learning_rate": 1.3100781985760188e-05, "loss": 0.4374, "step": 1940 }, { "epoch": 1.25, "learning_rate": 1.3094149911244678e-05, "loss": 0.4112, "step": 1941 }, { "epoch": 1.25, "learning_rate": 1.3087516331355501e-05, "loss": 0.4174, "step": 1942 }, { "epoch": 1.25, "learning_rate": 1.3080881249320038e-05, "loss": 0.4202, "step": 1943 }, { "epoch": 1.26, "learning_rate": 1.3074244668366412e-05, "loss": 0.4491, "step": 1944 }, { "epoch": 1.26, "learning_rate": 1.3067606591723463e-05, "loss": 0.4179, "step": 1945 }, { "epoch": 1.26, "learning_rate": 1.3060967022620766e-05, "loss": 0.4171, "step": 1946 }, { "epoch": 1.26, "learning_rate": 1.3054325964288626e-05, "loss": 0.4096, "step": 1947 }, { "epoch": 1.26, "learning_rate": 1.3047683419958062e-05, "loss": 0.4226, "step": 1948 }, { "epoch": 1.26, "learning_rate": 1.3041039392860824e-05, "loss": 0.4188, "step": 1949 }, { "epoch": 1.26, "learning_rate": 1.3034393886229381e-05, "loss": 0.4414, "step": 1950 }, { "epoch": 1.26, "learning_rate": 1.3027746903296924e-05, "loss": 0.4453, "step": 1951 }, { "epoch": 1.26, "learning_rate": 1.3021098447297358e-05, "loss": 0.4093, "step": 1952 }, { "epoch": 1.26, "learning_rate": 1.3014448521465304e-05, "loss": 0.4308, "step": 1953 }, { "epoch": 1.26, "learning_rate": 1.3007797129036104e-05, "loss": 0.4244, "step": 1954 }, { "epoch": 1.26, "learning_rate": 1.3001144273245807e-05, "loss": 0.4092, "step": 1955 }, { "epoch": 1.26, "learning_rate": 1.2994489957331183e-05, "loss": 0.4297, "step": 1956 }, { "epoch": 1.26, "learning_rate": 1.2987834184529701e-05, "loss": 0.4276, "step": 1957 }, { "epoch": 1.26, "learning_rate": 1.2981176958079549e-05, "loss": 0.444, "step": 1958 }, { "epoch": 1.26, "learning_rate": 1.2974518281219614e-05, "loss": 0.436, "step": 1959 }, { "epoch": 1.27, "learning_rate": 1.2967858157189495e-05, "loss": 0.4308, "step": 1960 }, { "epoch": 1.27, "learning_rate": 1.296119658922949e-05, "loss": 0.4268, "step": 1961 }, { "epoch": 1.27, "learning_rate": 1.2954533580580603e-05, "loss": 0.4543, "step": 1962 }, { "epoch": 1.27, "learning_rate": 1.2947869134484541e-05, "loss": 0.4184, "step": 1963 }, { "epoch": 1.27, "learning_rate": 1.29412032541837e-05, "loss": 0.4278, "step": 1964 }, { "epoch": 1.27, "learning_rate": 1.2934535942921189e-05, "loss": 0.4362, "step": 1965 }, { "epoch": 1.27, "learning_rate": 1.29278672039408e-05, "loss": 0.4522, "step": 1966 }, { "epoch": 1.27, "learning_rate": 1.2921197040487027e-05, "loss": 0.4205, "step": 1967 }, { "epoch": 1.27, "learning_rate": 1.2914525455805056e-05, "loss": 0.4325, "step": 1968 }, { "epoch": 1.27, "learning_rate": 1.290785245314076e-05, "loss": 0.4154, "step": 1969 }, { "epoch": 1.27, "learning_rate": 1.2901178035740709e-05, "loss": 0.4341, "step": 1970 }, { "epoch": 1.27, "learning_rate": 1.2894502206852153e-05, "loss": 0.4174, "step": 1971 }, { "epoch": 1.27, "learning_rate": 1.2887824969723035e-05, "loss": 0.4308, "step": 1972 }, { "epoch": 1.27, "learning_rate": 1.2881146327601981e-05, "loss": 0.4416, "step": 1973 }, { "epoch": 1.27, "learning_rate": 1.2874466283738303e-05, "loss": 0.3959, "step": 1974 }, { "epoch": 1.28, "learning_rate": 1.2867784841381994e-05, "loss": 0.433, "step": 1975 }, { "epoch": 1.28, "learning_rate": 1.2861102003783722e-05, "loss": 0.418, "step": 1976 }, { "epoch": 1.28, "learning_rate": 1.2854417774194841e-05, "loss": 0.4286, "step": 1977 }, { "epoch": 1.28, "learning_rate": 1.284773215586738e-05, "loss": 0.4246, "step": 1978 }, { "epoch": 1.28, "learning_rate": 1.284104515205404e-05, "loss": 0.4258, "step": 1979 }, { "epoch": 1.28, "learning_rate": 1.2834356766008198e-05, "loss": 0.4398, "step": 1980 }, { "epoch": 1.28, "learning_rate": 1.2827667000983907e-05, "loss": 0.4423, "step": 1981 }, { "epoch": 1.28, "learning_rate": 1.2820975860235892e-05, "loss": 0.435, "step": 1982 }, { "epoch": 1.28, "learning_rate": 1.2814283347019538e-05, "loss": 0.4196, "step": 1983 }, { "epoch": 1.28, "learning_rate": 1.2807589464590908e-05, "loss": 0.4563, "step": 1984 }, { "epoch": 1.28, "learning_rate": 1.2800894216206725e-05, "loss": 0.4323, "step": 1985 }, { "epoch": 1.28, "learning_rate": 1.2794197605124375e-05, "loss": 0.4242, "step": 1986 }, { "epoch": 1.28, "learning_rate": 1.2787499634601914e-05, "loss": 0.4209, "step": 1987 }, { "epoch": 1.28, "learning_rate": 1.2780800307898057e-05, "loss": 0.4267, "step": 1988 }, { "epoch": 1.28, "learning_rate": 1.2774099628272171e-05, "loss": 0.4221, "step": 1989 }, { "epoch": 1.28, "learning_rate": 1.2767397598984293e-05, "loss": 0.428, "step": 1990 }, { "epoch": 1.29, "learning_rate": 1.2760694223295112e-05, "loss": 0.4151, "step": 1991 }, { "epoch": 1.29, "learning_rate": 1.2753989504465967e-05, "loss": 0.4498, "step": 1992 }, { "epoch": 1.29, "learning_rate": 1.2747283445758858e-05, "loss": 0.4426, "step": 1993 }, { "epoch": 1.29, "learning_rate": 1.2740576050436433e-05, "loss": 0.4274, "step": 1994 }, { "epoch": 1.29, "learning_rate": 1.2733867321761992e-05, "loss": 0.4081, "step": 1995 }, { "epoch": 1.29, "learning_rate": 1.2727157262999481e-05, "loss": 0.4093, "step": 1996 }, { "epoch": 1.29, "learning_rate": 1.2720445877413496e-05, "loss": 0.4261, "step": 1997 }, { "epoch": 1.29, "learning_rate": 1.2713733168269275e-05, "loss": 0.4435, "step": 1998 }, { "epoch": 1.29, "learning_rate": 1.2707019138832704e-05, "loss": 0.42, "step": 1999 }, { "epoch": 1.29, "learning_rate": 1.270030379237031e-05, "loss": 0.4532, "step": 2000 }, { "epoch": 1.29, "learning_rate": 1.269358713214926e-05, "loss": 0.4308, "step": 2001 }, { "epoch": 1.29, "learning_rate": 1.2686869161437364e-05, "loss": 0.4175, "step": 2002 }, { "epoch": 1.29, "learning_rate": 1.2680149883503062e-05, "loss": 0.4339, "step": 2003 }, { "epoch": 1.29, "learning_rate": 1.2673429301615431e-05, "loss": 0.4274, "step": 2004 }, { "epoch": 1.29, "learning_rate": 1.2666707419044193e-05, "loss": 0.4081, "step": 2005 }, { "epoch": 1.3, "learning_rate": 1.2659984239059693e-05, "loss": 0.4311, "step": 2006 }, { "epoch": 1.3, "learning_rate": 1.2653259764932904e-05, "loss": 0.4371, "step": 2007 }, { "epoch": 1.3, "learning_rate": 1.2646533999935442e-05, "loss": 0.4438, "step": 2008 }, { "epoch": 1.3, "learning_rate": 1.263980694733954e-05, "loss": 0.4429, "step": 2009 }, { "epoch": 1.3, "learning_rate": 1.2633078610418062e-05, "loss": 0.4403, "step": 2010 }, { "epoch": 1.3, "learning_rate": 1.262634899244449e-05, "loss": 0.4135, "step": 2011 }, { "epoch": 1.3, "learning_rate": 1.2619618096692942e-05, "loss": 0.4291, "step": 2012 }, { "epoch": 1.3, "learning_rate": 1.2612885926438147e-05, "loss": 0.4358, "step": 2013 }, { "epoch": 1.3, "learning_rate": 1.2606152484955458e-05, "loss": 0.4297, "step": 2014 }, { "epoch": 1.3, "learning_rate": 1.2599417775520849e-05, "loss": 0.427, "step": 2015 }, { "epoch": 1.3, "learning_rate": 1.25926818014109e-05, "loss": 0.4376, "step": 2016 }, { "epoch": 1.3, "learning_rate": 1.2585944565902823e-05, "loss": 0.4364, "step": 2017 }, { "epoch": 1.3, "learning_rate": 1.257920607227443e-05, "loss": 0.4074, "step": 2018 }, { "epoch": 1.3, "learning_rate": 1.2572466323804154e-05, "loss": 0.3826, "step": 2019 }, { "epoch": 1.3, "learning_rate": 1.256572532377103e-05, "loss": 0.4122, "step": 2020 }, { "epoch": 1.3, "learning_rate": 1.2558983075454708e-05, "loss": 0.4045, "step": 2021 }, { "epoch": 1.31, "learning_rate": 1.2552239582135446e-05, "loss": 0.4306, "step": 2022 }, { "epoch": 1.31, "learning_rate": 1.25454948470941e-05, "loss": 0.4083, "step": 2023 }, { "epoch": 1.31, "learning_rate": 1.253874887361214e-05, "loss": 0.4269, "step": 2024 }, { "epoch": 1.31, "learning_rate": 1.2532001664971634e-05, "loss": 0.4528, "step": 2025 }, { "epoch": 1.31, "learning_rate": 1.2525253224455249e-05, "loss": 0.4374, "step": 2026 }, { "epoch": 1.31, "learning_rate": 1.2518503555346258e-05, "loss": 0.4297, "step": 2027 }, { "epoch": 1.31, "learning_rate": 1.2511752660928523e-05, "loss": 0.4236, "step": 2028 }, { "epoch": 1.31, "learning_rate": 1.2505000544486507e-05, "loss": 0.4148, "step": 2029 }, { "epoch": 1.31, "learning_rate": 1.2498247209305267e-05, "loss": 0.4376, "step": 2030 }, { "epoch": 1.31, "learning_rate": 1.2491492658670452e-05, "loss": 0.4091, "step": 2031 }, { "epoch": 1.31, "learning_rate": 1.2484736895868306e-05, "loss": 0.4188, "step": 2032 }, { "epoch": 1.31, "learning_rate": 1.2477979924185655e-05, "loss": 0.4161, "step": 2033 }, { "epoch": 1.31, "learning_rate": 1.2471221746909923e-05, "loss": 0.4444, "step": 2034 }, { "epoch": 1.31, "learning_rate": 1.2464462367329114e-05, "loss": 0.4501, "step": 2035 }, { "epoch": 1.31, "learning_rate": 1.2457701788731812e-05, "loss": 0.4092, "step": 2036 }, { "epoch": 1.32, "learning_rate": 1.2450940014407204e-05, "loss": 0.4386, "step": 2037 }, { "epoch": 1.32, "learning_rate": 1.2444177047645036e-05, "loss": 0.4299, "step": 2038 }, { "epoch": 1.32, "learning_rate": 1.2437412891735648e-05, "loss": 0.4215, "step": 2039 }, { "epoch": 1.32, "learning_rate": 1.2430647549969949e-05, "loss": 0.4457, "step": 2040 }, { "epoch": 1.32, "learning_rate": 1.2423881025639438e-05, "loss": 0.4466, "step": 2041 }, { "epoch": 1.32, "learning_rate": 1.2417113322036172e-05, "loss": 0.4283, "step": 2042 }, { "epoch": 1.32, "learning_rate": 1.2410344442452797e-05, "loss": 0.4232, "step": 2043 }, { "epoch": 1.32, "learning_rate": 1.2403574390182529e-05, "loss": 0.4146, "step": 2044 }, { "epoch": 1.32, "learning_rate": 1.2396803168519147e-05, "loss": 0.4215, "step": 2045 }, { "epoch": 1.32, "learning_rate": 1.2390030780757e-05, "loss": 0.4293, "step": 2046 }, { "epoch": 1.32, "learning_rate": 1.2383257230191013e-05, "loss": 0.4346, "step": 2047 }, { "epoch": 1.32, "learning_rate": 1.2376482520116666e-05, "loss": 0.426, "step": 2048 }, { "epoch": 1.32, "learning_rate": 1.2369706653830012e-05, "loss": 0.4196, "step": 2049 }, { "epoch": 1.32, "learning_rate": 1.2362929634627663e-05, "loss": 0.4538, "step": 2050 }, { "epoch": 1.32, "learning_rate": 1.2356151465806785e-05, "loss": 0.426, "step": 2051 }, { "epoch": 1.32, "learning_rate": 1.2349372150665117e-05, "loss": 0.4342, "step": 2052 }, { "epoch": 1.33, "learning_rate": 1.2342591692500949e-05, "loss": 0.4327, "step": 2053 }, { "epoch": 1.33, "learning_rate": 1.2335810094613123e-05, "loss": 0.4106, "step": 2054 }, { "epoch": 1.33, "learning_rate": 1.2329027360301038e-05, "loss": 0.4295, "step": 2055 }, { "epoch": 1.33, "learning_rate": 1.2322243492864651e-05, "loss": 0.4306, "step": 2056 }, { "epoch": 1.33, "learning_rate": 1.2315458495604467e-05, "loss": 0.4453, "step": 2057 }, { "epoch": 1.33, "learning_rate": 1.2308672371821532e-05, "loss": 0.4387, "step": 2058 }, { "epoch": 1.33, "learning_rate": 1.230188512481746e-05, "loss": 0.4147, "step": 2059 }, { "epoch": 1.33, "learning_rate": 1.2295096757894389e-05, "loss": 0.4233, "step": 2060 }, { "epoch": 1.33, "learning_rate": 1.2288307274355018e-05, "loss": 0.4111, "step": 2061 }, { "epoch": 1.33, "learning_rate": 1.2281516677502586e-05, "loss": 0.4147, "step": 2062 }, { "epoch": 1.33, "learning_rate": 1.2274724970640865e-05, "loss": 0.4381, "step": 2063 }, { "epoch": 1.33, "learning_rate": 1.2267932157074178e-05, "loss": 0.4401, "step": 2064 }, { "epoch": 1.33, "learning_rate": 1.2261138240107382e-05, "loss": 0.4494, "step": 2065 }, { "epoch": 1.33, "learning_rate": 1.225434322304587e-05, "loss": 0.4307, "step": 2066 }, { "epoch": 1.33, "learning_rate": 1.2247547109195567e-05, "loss": 0.4288, "step": 2067 }, { "epoch": 1.34, "learning_rate": 1.224074990186294e-05, "loss": 0.4196, "step": 2068 }, { "epoch": 1.34, "learning_rate": 1.2233951604354983e-05, "loss": 0.4536, "step": 2069 }, { "epoch": 1.34, "learning_rate": 1.2227152219979224e-05, "loss": 0.429, "step": 2070 }, { "epoch": 1.34, "learning_rate": 1.2220351752043708e-05, "loss": 0.4448, "step": 2071 }, { "epoch": 1.34, "learning_rate": 1.2213550203857025e-05, "loss": 0.4361, "step": 2072 }, { "epoch": 1.34, "learning_rate": 1.2206747578728275e-05, "loss": 0.4393, "step": 2073 }, { "epoch": 1.34, "learning_rate": 1.2199943879967092e-05, "loss": 0.4367, "step": 2074 }, { "epoch": 1.34, "learning_rate": 1.2193139110883626e-05, "loss": 0.4207, "step": 2075 }, { "epoch": 1.34, "learning_rate": 1.2186333274788558e-05, "loss": 0.419, "step": 2076 }, { "epoch": 1.34, "learning_rate": 1.2179526374993068e-05, "loss": 0.425, "step": 2077 }, { "epoch": 1.34, "learning_rate": 1.2172718414808877e-05, "loss": 0.4317, "step": 2078 }, { "epoch": 1.34, "learning_rate": 1.2165909397548208e-05, "loss": 0.4431, "step": 2079 }, { "epoch": 1.34, "learning_rate": 1.21590993265238e-05, "loss": 0.4372, "step": 2080 }, { "epoch": 1.34, "learning_rate": 1.2152288205048909e-05, "loss": 0.4135, "step": 2081 }, { "epoch": 1.34, "learning_rate": 1.2145476036437294e-05, "loss": 0.4288, "step": 2082 }, { "epoch": 1.34, "learning_rate": 1.2138662824003232e-05, "loss": 0.4395, "step": 2083 }, { "epoch": 1.35, "learning_rate": 1.2131848571061501e-05, "loss": 0.4621, "step": 2084 }, { "epoch": 1.35, "learning_rate": 1.2125033280927395e-05, "loss": 0.4232, "step": 2085 }, { "epoch": 1.35, "learning_rate": 1.21182169569167e-05, "loss": 0.4043, "step": 2086 }, { "epoch": 1.35, "learning_rate": 1.2111399602345715e-05, "loss": 0.4035, "step": 2087 }, { "epoch": 1.35, "learning_rate": 1.2104581220531237e-05, "loss": 0.4058, "step": 2088 }, { "epoch": 1.35, "learning_rate": 1.2097761814790561e-05, "loss": 0.4058, "step": 2089 }, { "epoch": 1.35, "learning_rate": 1.2090941388441482e-05, "loss": 0.4194, "step": 2090 }, { "epoch": 1.35, "learning_rate": 1.2084119944802288e-05, "loss": 0.4188, "step": 2091 }, { "epoch": 1.35, "learning_rate": 1.2077297487191771e-05, "loss": 0.4258, "step": 2092 }, { "epoch": 1.35, "learning_rate": 1.2070474018929204e-05, "loss": 0.4232, "step": 2093 }, { "epoch": 1.35, "learning_rate": 1.2063649543334364e-05, "loss": 0.4352, "step": 2094 }, { "epoch": 1.35, "learning_rate": 1.2056824063727508e-05, "loss": 0.4206, "step": 2095 }, { "epoch": 1.35, "learning_rate": 1.2049997583429389e-05, "loss": 0.4601, "step": 2096 }, { "epoch": 1.35, "learning_rate": 1.2043170105761242e-05, "loss": 0.4073, "step": 2097 }, { "epoch": 1.35, "learning_rate": 1.2036341634044785e-05, "loss": 0.4246, "step": 2098 }, { "epoch": 1.36, "learning_rate": 1.202951217160223e-05, "loss": 0.4149, "step": 2099 }, { "epoch": 1.36, "learning_rate": 1.202268172175626e-05, "loss": 0.4096, "step": 2100 }, { "epoch": 1.36, "learning_rate": 1.2015850287830045e-05, "loss": 0.461, "step": 2101 }, { "epoch": 1.36, "learning_rate": 1.200901787314723e-05, "loss": 0.4246, "step": 2102 }, { "epoch": 1.36, "learning_rate": 1.2002184481031938e-05, "loss": 0.4161, "step": 2103 }, { "epoch": 1.36, "learning_rate": 1.1995350114808772e-05, "loss": 0.4206, "step": 2104 }, { "epoch": 1.36, "learning_rate": 1.1988514777802802e-05, "loss": 0.4184, "step": 2105 }, { "epoch": 1.36, "learning_rate": 1.1981678473339576e-05, "loss": 0.4309, "step": 2106 }, { "epoch": 1.36, "learning_rate": 1.1974841204745107e-05, "loss": 0.4245, "step": 2107 }, { "epoch": 1.36, "learning_rate": 1.1968002975345882e-05, "loss": 0.4242, "step": 2108 }, { "epoch": 1.36, "learning_rate": 1.1961163788468854e-05, "loss": 0.4388, "step": 2109 }, { "epoch": 1.36, "learning_rate": 1.1954323647441439e-05, "loss": 0.442, "step": 2110 }, { "epoch": 1.36, "learning_rate": 1.1947482555591525e-05, "loss": 0.4521, "step": 2111 }, { "epoch": 1.36, "learning_rate": 1.194064051624745e-05, "loss": 0.4427, "step": 2112 }, { "epoch": 1.36, "learning_rate": 1.1933797532738029e-05, "loss": 0.4253, "step": 2113 }, { "epoch": 1.36, "learning_rate": 1.1926953608392522e-05, "loss": 0.4489, "step": 2114 }, { "epoch": 1.37, "learning_rate": 1.1920108746540655e-05, "loss": 0.427, "step": 2115 }, { "epoch": 1.37, "learning_rate": 1.1913262950512605e-05, "loss": 0.441, "step": 2116 }, { "epoch": 1.37, "learning_rate": 1.1906416223639008e-05, "loss": 0.4593, "step": 2117 }, { "epoch": 1.37, "learning_rate": 1.1899568569250951e-05, "loss": 0.4321, "step": 2118 }, { "epoch": 1.37, "learning_rate": 1.1892719990679968e-05, "loss": 0.4235, "step": 2119 }, { "epoch": 1.37, "learning_rate": 1.1885870491258054e-05, "loss": 0.4341, "step": 2120 }, { "epoch": 1.37, "learning_rate": 1.1879020074317641e-05, "loss": 0.4356, "step": 2121 }, { "epoch": 1.37, "learning_rate": 1.1872168743191613e-05, "loss": 0.4333, "step": 2122 }, { "epoch": 1.37, "learning_rate": 1.1865316501213293e-05, "loss": 0.4352, "step": 2123 }, { "epoch": 1.37, "learning_rate": 1.185846335171645e-05, "loss": 0.4127, "step": 2124 }, { "epoch": 1.37, "learning_rate": 1.1851609298035305e-05, "loss": 0.4274, "step": 2125 }, { "epoch": 1.37, "learning_rate": 1.1844754343504503e-05, "loss": 0.4287, "step": 2126 }, { "epoch": 1.37, "learning_rate": 1.1837898491459135e-05, "loss": 0.4248, "step": 2127 }, { "epoch": 1.37, "learning_rate": 1.1831041745234728e-05, "loss": 0.4318, "step": 2128 }, { "epoch": 1.37, "learning_rate": 1.1824184108167238e-05, "loss": 0.429, "step": 2129 }, { "epoch": 1.38, "learning_rate": 1.181732558359307e-05, "loss": 0.4283, "step": 2130 }, { "epoch": 1.38, "learning_rate": 1.1810466174849044e-05, "loss": 0.4444, "step": 2131 }, { "epoch": 1.38, "learning_rate": 1.180360588527242e-05, "loss": 0.4356, "step": 2132 }, { "epoch": 1.38, "learning_rate": 1.1796744718200881e-05, "loss": 0.4079, "step": 2133 }, { "epoch": 1.38, "learning_rate": 1.1789882676972541e-05, "loss": 0.4299, "step": 2134 }, { "epoch": 1.38, "learning_rate": 1.1783019764925938e-05, "loss": 0.4073, "step": 2135 }, { "epoch": 1.38, "learning_rate": 1.177615598540003e-05, "loss": 0.4423, "step": 2136 }, { "epoch": 1.38, "learning_rate": 1.1769291341734206e-05, "loss": 0.442, "step": 2137 }, { "epoch": 1.38, "learning_rate": 1.1762425837268263e-05, "loss": 0.4504, "step": 2138 }, { "epoch": 1.38, "learning_rate": 1.1755559475342431e-05, "loss": 0.4251, "step": 2139 }, { "epoch": 1.38, "learning_rate": 1.1748692259297347e-05, "loss": 0.4423, "step": 2140 }, { "epoch": 1.38, "learning_rate": 1.1741824192474065e-05, "loss": 0.4173, "step": 2141 }, { "epoch": 1.38, "learning_rate": 1.1734955278214057e-05, "loss": 0.4112, "step": 2142 }, { "epoch": 1.38, "learning_rate": 1.17280855198592e-05, "loss": 0.4119, "step": 2143 }, { "epoch": 1.38, "learning_rate": 1.172121492075179e-05, "loss": 0.4239, "step": 2144 }, { "epoch": 1.38, "learning_rate": 1.1714343484234528e-05, "loss": 0.4146, "step": 2145 }, { "epoch": 1.39, "learning_rate": 1.170747121365052e-05, "loss": 0.4141, "step": 2146 }, { "epoch": 1.39, "learning_rate": 1.1700598112343287e-05, "loss": 0.4253, "step": 2147 }, { "epoch": 1.39, "learning_rate": 1.169372418365674e-05, "loss": 0.4192, "step": 2148 }, { "epoch": 1.39, "learning_rate": 1.1686849430935204e-05, "loss": 0.4463, "step": 2149 }, { "epoch": 1.39, "learning_rate": 1.16799738575234e-05, "loss": 0.4097, "step": 2150 }, { "epoch": 1.39, "learning_rate": 1.1673097466766452e-05, "loss": 0.4295, "step": 2151 }, { "epoch": 1.39, "learning_rate": 1.1666220262009877e-05, "loss": 0.447, "step": 2152 }, { "epoch": 1.39, "learning_rate": 1.165934224659959e-05, "loss": 0.4461, "step": 2153 }, { "epoch": 1.39, "learning_rate": 1.1652463423881898e-05, "loss": 0.4251, "step": 2154 }, { "epoch": 1.39, "learning_rate": 1.1645583797203511e-05, "loss": 0.451, "step": 2155 }, { "epoch": 1.39, "learning_rate": 1.1638703369911517e-05, "loss": 0.438, "step": 2156 }, { "epoch": 1.39, "learning_rate": 1.1631822145353398e-05, "loss": 0.4396, "step": 2157 }, { "epoch": 1.39, "learning_rate": 1.1624940126877027e-05, "loss": 0.4546, "step": 2158 }, { "epoch": 1.39, "learning_rate": 1.161805731783066e-05, "loss": 0.4187, "step": 2159 }, { "epoch": 1.39, "learning_rate": 1.161117372156294e-05, "loss": 0.4226, "step": 2160 }, { "epoch": 1.4, "learning_rate": 1.1604289341422889e-05, "loss": 0.4361, "step": 2161 }, { "epoch": 1.4, "learning_rate": 1.1597404180759917e-05, "loss": 0.4258, "step": 2162 }, { "epoch": 1.4, "learning_rate": 1.1590518242923807e-05, "loss": 0.429, "step": 2163 }, { "epoch": 1.4, "learning_rate": 1.1583631531264723e-05, "loss": 0.4326, "step": 2164 }, { "epoch": 1.4, "learning_rate": 1.1576744049133209e-05, "loss": 0.4261, "step": 2165 }, { "epoch": 1.4, "learning_rate": 1.1569855799880174e-05, "loss": 0.4465, "step": 2166 }, { "epoch": 1.4, "learning_rate": 1.1562966786856916e-05, "loss": 0.4252, "step": 2167 }, { "epoch": 1.4, "learning_rate": 1.1556077013415084e-05, "loss": 0.4284, "step": 2168 }, { "epoch": 1.4, "learning_rate": 1.1549186482906715e-05, "loss": 0.4416, "step": 2169 }, { "epoch": 1.4, "learning_rate": 1.1542295198684206e-05, "loss": 0.4374, "step": 2170 }, { "epoch": 1.4, "learning_rate": 1.153540316410032e-05, "loss": 0.4483, "step": 2171 }, { "epoch": 1.4, "learning_rate": 1.152851038250819e-05, "loss": 0.425, "step": 2172 }, { "epoch": 1.4, "learning_rate": 1.152161685726131e-05, "loss": 0.4552, "step": 2173 }, { "epoch": 1.4, "learning_rate": 1.1514722591713529e-05, "loss": 0.4016, "step": 2174 }, { "epoch": 1.4, "learning_rate": 1.1507827589219072e-05, "loss": 0.4156, "step": 2175 }, { "epoch": 1.4, "learning_rate": 1.150093185313251e-05, "loss": 0.4351, "step": 2176 }, { "epoch": 1.41, "learning_rate": 1.149403538680877e-05, "loss": 0.4558, "step": 2177 }, { "epoch": 1.41, "learning_rate": 1.1487138193603142e-05, "loss": 0.4501, "step": 2178 }, { "epoch": 1.41, "learning_rate": 1.1480240276871264e-05, "loss": 0.465, "step": 2179 }, { "epoch": 1.41, "learning_rate": 1.147334163996913e-05, "loss": 0.4207, "step": 2180 }, { "epoch": 1.41, "learning_rate": 1.146644228625308e-05, "loss": 0.4066, "step": 2181 }, { "epoch": 1.41, "learning_rate": 1.1459542219079808e-05, "loss": 0.4499, "step": 2182 }, { "epoch": 1.41, "learning_rate": 1.1452641441806348e-05, "loss": 0.4082, "step": 2183 }, { "epoch": 1.41, "learning_rate": 1.1445739957790087e-05, "loss": 0.4345, "step": 2184 }, { "epoch": 1.41, "learning_rate": 1.1438837770388753e-05, "loss": 0.4425, "step": 2185 }, { "epoch": 1.41, "learning_rate": 1.1431934882960412e-05, "loss": 0.4226, "step": 2186 }, { "epoch": 1.41, "learning_rate": 1.1425031298863478e-05, "loss": 0.4112, "step": 2187 }, { "epoch": 1.41, "learning_rate": 1.14181270214567e-05, "loss": 0.4274, "step": 2188 }, { "epoch": 1.41, "learning_rate": 1.141122205409916e-05, "loss": 0.4209, "step": 2189 }, { "epoch": 1.41, "learning_rate": 1.1404316400150288e-05, "loss": 0.4546, "step": 2190 }, { "epoch": 1.41, "learning_rate": 1.1397410062969833e-05, "loss": 0.4144, "step": 2191 }, { "epoch": 1.42, "learning_rate": 1.1390503045917892e-05, "loss": 0.4325, "step": 2192 }, { "epoch": 1.42, "learning_rate": 1.1383595352354874e-05, "loss": 0.4419, "step": 2193 }, { "epoch": 1.42, "learning_rate": 1.1376686985641536e-05, "loss": 0.4277, "step": 2194 }, { "epoch": 1.42, "learning_rate": 1.1369777949138948e-05, "loss": 0.432, "step": 2195 }, { "epoch": 1.42, "learning_rate": 1.1362868246208519e-05, "loss": 0.4255, "step": 2196 }, { "epoch": 1.42, "learning_rate": 1.1355957880211968e-05, "loss": 0.4191, "step": 2197 }, { "epoch": 1.42, "learning_rate": 1.1349046854511347e-05, "loss": 0.4292, "step": 2198 }, { "epoch": 1.42, "learning_rate": 1.1342135172469024e-05, "loss": 0.4237, "step": 2199 }, { "epoch": 1.42, "learning_rate": 1.1335222837447692e-05, "loss": 0.4277, "step": 2200 }, { "epoch": 1.42, "learning_rate": 1.1328309852810357e-05, "loss": 0.4359, "step": 2201 }, { "epoch": 1.42, "learning_rate": 1.132139622192034e-05, "loss": 0.4327, "step": 2202 }, { "epoch": 1.42, "learning_rate": 1.1314481948141277e-05, "loss": 0.4337, "step": 2203 }, { "epoch": 1.42, "learning_rate": 1.1307567034837123e-05, "loss": 0.4173, "step": 2204 }, { "epoch": 1.42, "learning_rate": 1.1300651485372133e-05, "loss": 0.4298, "step": 2205 }, { "epoch": 1.42, "learning_rate": 1.129373530311088e-05, "loss": 0.3972, "step": 2206 }, { "epoch": 1.42, "learning_rate": 1.1286818491418246e-05, "loss": 0.4238, "step": 2207 }, { "epoch": 1.43, "learning_rate": 1.127990105365941e-05, "loss": 0.4247, "step": 2208 }, { "epoch": 1.43, "learning_rate": 1.1272982993199868e-05, "loss": 0.42, "step": 2209 }, { "epoch": 1.43, "learning_rate": 1.1266064313405404e-05, "loss": 0.4307, "step": 2210 }, { "epoch": 1.43, "learning_rate": 1.125914501764212e-05, "loss": 0.4442, "step": 2211 }, { "epoch": 1.43, "learning_rate": 1.1252225109276404e-05, "loss": 0.4535, "step": 2212 }, { "epoch": 1.43, "learning_rate": 1.124530459167495e-05, "loss": 0.4112, "step": 2213 }, { "epoch": 1.43, "learning_rate": 1.1238383468204744e-05, "loss": 0.4447, "step": 2214 }, { "epoch": 1.43, "learning_rate": 1.1231461742233068e-05, "loss": 0.4326, "step": 2215 }, { "epoch": 1.43, "learning_rate": 1.12245394171275e-05, "loss": 0.4229, "step": 2216 }, { "epoch": 1.43, "learning_rate": 1.1217616496255907e-05, "loss": 0.4473, "step": 2217 }, { "epoch": 1.43, "learning_rate": 1.1210692982986447e-05, "loss": 0.4137, "step": 2218 }, { "epoch": 1.43, "learning_rate": 1.1203768880687562e-05, "loss": 0.43, "step": 2219 }, { "epoch": 1.43, "learning_rate": 1.1196844192727984e-05, "loss": 0.4262, "step": 2220 }, { "epoch": 1.43, "learning_rate": 1.1189918922476734e-05, "loss": 0.4383, "step": 2221 }, { "epoch": 1.43, "learning_rate": 1.1182993073303107e-05, "loss": 0.4052, "step": 2222 }, { "epoch": 1.44, "learning_rate": 1.1176066648576687e-05, "loss": 0.4396, "step": 2223 }, { "epoch": 1.44, "learning_rate": 1.1169139651667334e-05, "loss": 0.4297, "step": 2224 }, { "epoch": 1.44, "learning_rate": 1.1162212085945192e-05, "loss": 0.4511, "step": 2225 }, { "epoch": 1.44, "learning_rate": 1.1155283954780676e-05, "loss": 0.4364, "step": 2226 }, { "epoch": 1.44, "learning_rate": 1.1148355261544477e-05, "loss": 0.4333, "step": 2227 }, { "epoch": 1.44, "learning_rate": 1.1141426009607562e-05, "loss": 0.4391, "step": 2228 }, { "epoch": 1.44, "learning_rate": 1.1134496202341166e-05, "loss": 0.4312, "step": 2229 }, { "epoch": 1.44, "learning_rate": 1.1127565843116798e-05, "loss": 0.4129, "step": 2230 }, { "epoch": 1.44, "learning_rate": 1.1120634935306233e-05, "loss": 0.4435, "step": 2231 }, { "epoch": 1.44, "learning_rate": 1.1113703482281515e-05, "loss": 0.4212, "step": 2232 }, { "epoch": 1.44, "learning_rate": 1.1106771487414951e-05, "loss": 0.4344, "step": 2233 }, { "epoch": 1.44, "learning_rate": 1.1099838954079117e-05, "loss": 0.4174, "step": 2234 }, { "epoch": 1.44, "learning_rate": 1.1092905885646839e-05, "loss": 0.4095, "step": 2235 }, { "epoch": 1.44, "learning_rate": 1.1085972285491213e-05, "loss": 0.4349, "step": 2236 }, { "epoch": 1.44, "learning_rate": 1.1079038156985599e-05, "loss": 0.4342, "step": 2237 }, { "epoch": 1.44, "learning_rate": 1.1072103503503599e-05, "loss": 0.43, "step": 2238 }, { "epoch": 1.45, "learning_rate": 1.1065168328419083e-05, "loss": 0.4189, "step": 2239 }, { "epoch": 1.45, "learning_rate": 1.1058232635106167e-05, "loss": 0.4195, "step": 2240 }, { "epoch": 1.45, "learning_rate": 1.1051296426939221e-05, "loss": 0.4066, "step": 2241 }, { "epoch": 1.45, "learning_rate": 1.104435970729287e-05, "loss": 0.4174, "step": 2242 }, { "epoch": 1.45, "learning_rate": 1.1037422479541986e-05, "loss": 0.4261, "step": 2243 }, { "epoch": 1.45, "learning_rate": 1.103048474706168e-05, "loss": 0.4136, "step": 2244 }, { "epoch": 1.45, "learning_rate": 1.1023546513227322e-05, "loss": 0.4238, "step": 2245 }, { "epoch": 1.45, "learning_rate": 1.1016607781414514e-05, "loss": 0.4252, "step": 2246 }, { "epoch": 1.45, "learning_rate": 1.1009668554999109e-05, "loss": 0.4352, "step": 2247 }, { "epoch": 1.45, "learning_rate": 1.1002728837357192e-05, "loss": 0.4289, "step": 2248 }, { "epoch": 1.45, "learning_rate": 1.0995788631865099e-05, "loss": 0.4431, "step": 2249 }, { "epoch": 1.45, "learning_rate": 1.098884794189939e-05, "loss": 0.4489, "step": 2250 }, { "epoch": 1.45, "learning_rate": 1.0981906770836872e-05, "loss": 0.416, "step": 2251 }, { "epoch": 1.45, "learning_rate": 1.097496512205458e-05, "loss": 0.4389, "step": 2252 }, { "epoch": 1.45, "learning_rate": 1.096802299892978e-05, "loss": 0.4525, "step": 2253 }, { "epoch": 1.46, "learning_rate": 1.0961080404839974e-05, "loss": 0.4428, "step": 2254 }, { "epoch": 1.46, "learning_rate": 1.0954137343162892e-05, "loss": 0.4387, "step": 2255 }, { "epoch": 1.46, "learning_rate": 1.0947193817276485e-05, "loss": 0.4506, "step": 2256 }, { "epoch": 1.46, "learning_rate": 1.0940249830558937e-05, "loss": 0.4141, "step": 2257 }, { "epoch": 1.46, "learning_rate": 1.0933305386388656e-05, "loss": 0.4101, "step": 2258 }, { "epoch": 1.46, "learning_rate": 1.0926360488144269e-05, "loss": 0.4423, "step": 2259 }, { "epoch": 1.46, "learning_rate": 1.0919415139204625e-05, "loss": 0.423, "step": 2260 }, { "epoch": 1.46, "learning_rate": 1.0912469342948793e-05, "loss": 0.4375, "step": 2261 }, { "epoch": 1.46, "learning_rate": 1.0905523102756061e-05, "loss": 0.4355, "step": 2262 }, { "epoch": 1.46, "learning_rate": 1.0898576422005931e-05, "loss": 0.4253, "step": 2263 }, { "epoch": 1.46, "learning_rate": 1.089162930407812e-05, "loss": 0.4319, "step": 2264 }, { "epoch": 1.46, "learning_rate": 1.0884681752352555e-05, "loss": 0.4516, "step": 2265 }, { "epoch": 1.46, "learning_rate": 1.087773377020938e-05, "loss": 0.4629, "step": 2266 }, { "epoch": 1.46, "learning_rate": 1.0870785361028939e-05, "loss": 0.4308, "step": 2267 }, { "epoch": 1.46, "learning_rate": 1.0863836528191795e-05, "loss": 0.4223, "step": 2268 }, { "epoch": 1.46, "learning_rate": 1.0856887275078712e-05, "loss": 0.4195, "step": 2269 }, { "epoch": 1.47, "learning_rate": 1.0849937605070658e-05, "loss": 0.4207, "step": 2270 }, { "epoch": 1.47, "learning_rate": 1.08429875215488e-05, "loss": 0.4168, "step": 2271 }, { "epoch": 1.47, "learning_rate": 1.0836037027894515e-05, "loss": 0.4262, "step": 2272 }, { "epoch": 1.47, "learning_rate": 1.0829086127489373e-05, "loss": 0.4466, "step": 2273 }, { "epoch": 1.47, "learning_rate": 1.0822134823715139e-05, "loss": 0.4293, "step": 2274 }, { "epoch": 1.47, "learning_rate": 1.0815183119953787e-05, "loss": 0.4344, "step": 2275 }, { "epoch": 1.47, "learning_rate": 1.0808231019587472e-05, "loss": 0.4166, "step": 2276 }, { "epoch": 1.47, "learning_rate": 1.080127852599855e-05, "loss": 0.4125, "step": 2277 }, { "epoch": 1.47, "learning_rate": 1.079432564256957e-05, "loss": 0.4259, "step": 2278 }, { "epoch": 1.47, "learning_rate": 1.0787372372683258e-05, "loss": 0.4281, "step": 2279 }, { "epoch": 1.47, "learning_rate": 1.0780418719722544e-05, "loss": 0.4286, "step": 2280 }, { "epoch": 1.47, "learning_rate": 1.0773464687070529e-05, "loss": 0.4163, "step": 2281 }, { "epoch": 1.47, "learning_rate": 1.0766510278110514e-05, "loss": 0.4269, "step": 2282 }, { "epoch": 1.47, "learning_rate": 1.0759555496225973e-05, "loss": 0.4179, "step": 2283 }, { "epoch": 1.47, "learning_rate": 1.075260034480056e-05, "loss": 0.4658, "step": 2284 }, { "epoch": 1.48, "learning_rate": 1.0745644827218119e-05, "loss": 0.4263, "step": 2285 }, { "epoch": 1.48, "learning_rate": 1.0738688946862661e-05, "loss": 0.423, "step": 2286 }, { "epoch": 1.48, "learning_rate": 1.0731732707118385e-05, "loss": 0.4318, "step": 2287 }, { "epoch": 1.48, "learning_rate": 1.0724776111369654e-05, "loss": 0.4226, "step": 2288 }, { "epoch": 1.48, "learning_rate": 1.071781916300101e-05, "loss": 0.4424, "step": 2289 }, { "epoch": 1.48, "learning_rate": 1.0710861865397166e-05, "loss": 0.4371, "step": 2290 }, { "epoch": 1.48, "learning_rate": 1.0703904221943e-05, "loss": 0.4699, "step": 2291 }, { "epoch": 1.48, "learning_rate": 1.0696946236023566e-05, "loss": 0.4236, "step": 2292 }, { "epoch": 1.48, "learning_rate": 1.068998791102408e-05, "loss": 0.3996, "step": 2293 }, { "epoch": 1.48, "learning_rate": 1.0683029250329924e-05, "loss": 0.4376, "step": 2294 }, { "epoch": 1.48, "learning_rate": 1.0676070257326643e-05, "loss": 0.4494, "step": 2295 }, { "epoch": 1.48, "learning_rate": 1.0669110935399944e-05, "loss": 0.4223, "step": 2296 }, { "epoch": 1.48, "learning_rate": 1.0662151287935696e-05, "loss": 0.4123, "step": 2297 }, { "epoch": 1.48, "learning_rate": 1.0655191318319921e-05, "loss": 0.4299, "step": 2298 }, { "epoch": 1.48, "learning_rate": 1.06482310299388e-05, "loss": 0.4544, "step": 2299 }, { "epoch": 1.48, "learning_rate": 1.0641270426178677e-05, "loss": 0.4538, "step": 2300 }, { "epoch": 1.49, "learning_rate": 1.0634309510426037e-05, "loss": 0.4344, "step": 2301 }, { "epoch": 1.49, "learning_rate": 1.0627348286067521e-05, "loss": 0.4537, "step": 2302 }, { "epoch": 1.49, "learning_rate": 1.0620386756489927e-05, "loss": 0.4405, "step": 2303 }, { "epoch": 1.49, "learning_rate": 1.0613424925080194e-05, "loss": 0.4199, "step": 2304 }, { "epoch": 1.49, "learning_rate": 1.0606462795225409e-05, "loss": 0.4343, "step": 2305 }, { "epoch": 1.49, "learning_rate": 1.0599500370312805e-05, "loss": 0.4314, "step": 2306 }, { "epoch": 1.49, "learning_rate": 1.059253765372976e-05, "loss": 0.4158, "step": 2307 }, { "epoch": 1.49, "learning_rate": 1.058557464886379e-05, "loss": 0.4466, "step": 2308 }, { "epoch": 1.49, "learning_rate": 1.0578611359102555e-05, "loss": 0.4236, "step": 2309 }, { "epoch": 1.49, "learning_rate": 1.0571647787833853e-05, "loss": 0.42, "step": 2310 }, { "epoch": 1.49, "learning_rate": 1.0564683938445616e-05, "loss": 0.43, "step": 2311 }, { "epoch": 1.49, "learning_rate": 1.055771981432592e-05, "loss": 0.4326, "step": 2312 }, { "epoch": 1.49, "learning_rate": 1.0550755418862962e-05, "loss": 0.4192, "step": 2313 }, { "epoch": 1.49, "learning_rate": 1.054379075544508e-05, "loss": 0.4059, "step": 2314 }, { "epoch": 1.49, "learning_rate": 1.0536825827460739e-05, "loss": 0.437, "step": 2315 }, { "epoch": 1.5, "learning_rate": 1.0529860638298535e-05, "loss": 0.4249, "step": 2316 }, { "epoch": 1.5, "learning_rate": 1.0522895191347186e-05, "loss": 0.4035, "step": 2317 }, { "epoch": 1.5, "learning_rate": 1.0515929489995544e-05, "loss": 0.4231, "step": 2318 }, { "epoch": 1.5, "learning_rate": 1.0508963537632573e-05, "loss": 0.4082, "step": 2319 }, { "epoch": 1.5, "learning_rate": 1.0501997337647372e-05, "loss": 0.4255, "step": 2320 }, { "epoch": 1.5, "learning_rate": 1.0495030893429152e-05, "loss": 0.4043, "step": 2321 }, { "epoch": 1.5, "learning_rate": 1.0488064208367246e-05, "loss": 0.4268, "step": 2322 }, { "epoch": 1.5, "learning_rate": 1.0481097285851101e-05, "loss": 0.423, "step": 2323 }, { "epoch": 1.5, "learning_rate": 1.0474130129270281e-05, "loss": 0.4212, "step": 2324 }, { "epoch": 1.5, "learning_rate": 1.046716274201447e-05, "loss": 0.4388, "step": 2325 }, { "epoch": 1.5, "learning_rate": 1.0460195127473456e-05, "loss": 0.4583, "step": 2326 }, { "epoch": 1.5, "learning_rate": 1.045322728903714e-05, "loss": 0.4313, "step": 2327 }, { "epoch": 1.5, "learning_rate": 1.0446259230095531e-05, "loss": 0.433, "step": 2328 }, { "epoch": 1.5, "learning_rate": 1.043929095403875e-05, "loss": 0.4292, "step": 2329 }, { "epoch": 1.5, "learning_rate": 1.0432322464257019e-05, "loss": 0.4373, "step": 2330 }, { "epoch": 1.5, "learning_rate": 1.0425353764140665e-05, "loss": 0.4321, "step": 2331 }, { "epoch": 1.51, "learning_rate": 1.0418384857080118e-05, "loss": 0.3905, "step": 2332 }, { "epoch": 1.51, "learning_rate": 1.0411415746465907e-05, "loss": 0.4367, "step": 2333 }, { "epoch": 1.51, "learning_rate": 1.0404446435688665e-05, "loss": 0.4155, "step": 2334 }, { "epoch": 1.51, "learning_rate": 1.0397476928139113e-05, "loss": 0.4448, "step": 2335 }, { "epoch": 1.51, "learning_rate": 1.039050722720808e-05, "loss": 0.419, "step": 2336 }, { "epoch": 1.51, "learning_rate": 1.0383537336286476e-05, "loss": 0.4331, "step": 2337 }, { "epoch": 1.51, "learning_rate": 1.0376567258765316e-05, "loss": 0.4344, "step": 2338 }, { "epoch": 1.51, "learning_rate": 1.0369596998035698e-05, "loss": 0.43, "step": 2339 }, { "epoch": 1.51, "learning_rate": 1.0362626557488811e-05, "loss": 0.4255, "step": 2340 }, { "epoch": 1.51, "learning_rate": 1.035565594051593e-05, "loss": 0.426, "step": 2341 }, { "epoch": 1.51, "learning_rate": 1.0348685150508417e-05, "loss": 0.4458, "step": 2342 }, { "epoch": 1.51, "learning_rate": 1.0341714190857719e-05, "loss": 0.4432, "step": 2343 }, { "epoch": 1.51, "learning_rate": 1.0334743064955367e-05, "loss": 0.4096, "step": 2344 }, { "epoch": 1.51, "learning_rate": 1.0327771776192968e-05, "loss": 0.3971, "step": 2345 }, { "epoch": 1.51, "learning_rate": 1.0320800327962212e-05, "loss": 0.4154, "step": 2346 }, { "epoch": 1.52, "learning_rate": 1.0313828723654868e-05, "loss": 0.4308, "step": 2347 }, { "epoch": 1.52, "learning_rate": 1.0306856966662776e-05, "loss": 0.4024, "step": 2348 }, { "epoch": 1.52, "learning_rate": 1.029988506037785e-05, "loss": 0.4361, "step": 2349 }, { "epoch": 1.52, "learning_rate": 1.0292913008192088e-05, "loss": 0.401, "step": 2350 }, { "epoch": 1.52, "learning_rate": 1.0285940813497544e-05, "loss": 0.4204, "step": 2351 }, { "epoch": 1.52, "learning_rate": 1.027896847968635e-05, "loss": 0.414, "step": 2352 }, { "epoch": 1.52, "learning_rate": 1.0271996010150704e-05, "loss": 0.4312, "step": 2353 }, { "epoch": 1.52, "learning_rate": 1.0265023408282866e-05, "loss": 0.4215, "step": 2354 }, { "epoch": 1.52, "learning_rate": 1.0258050677475167e-05, "loss": 0.4141, "step": 2355 }, { "epoch": 1.52, "learning_rate": 1.0251077821119998e-05, "loss": 0.3989, "step": 2356 }, { "epoch": 1.52, "learning_rate": 1.0244104842609812e-05, "loss": 0.4123, "step": 2357 }, { "epoch": 1.52, "learning_rate": 1.0237131745337117e-05, "loss": 0.4386, "step": 2358 }, { "epoch": 1.52, "learning_rate": 1.0230158532694483e-05, "loss": 0.4149, "step": 2359 }, { "epoch": 1.52, "learning_rate": 1.0223185208074538e-05, "loss": 0.4236, "step": 2360 }, { "epoch": 1.52, "learning_rate": 1.0216211774869952e-05, "loss": 0.4166, "step": 2361 }, { "epoch": 1.52, "learning_rate": 1.0209238236473472e-05, "loss": 0.4121, "step": 2362 }, { "epoch": 1.53, "learning_rate": 1.0202264596277866e-05, "loss": 0.4145, "step": 2363 }, { "epoch": 1.53, "learning_rate": 1.0195290857675982e-05, "loss": 0.4245, "step": 2364 }, { "epoch": 1.53, "learning_rate": 1.0188317024060693e-05, "loss": 0.4348, "step": 2365 }, { "epoch": 1.53, "learning_rate": 1.0181343098824928e-05, "loss": 0.431, "step": 2366 }, { "epoch": 1.53, "learning_rate": 1.0174369085361655e-05, "loss": 0.4241, "step": 2367 }, { "epoch": 1.53, "learning_rate": 1.0167394987063894e-05, "loss": 0.4166, "step": 2368 }, { "epoch": 1.53, "learning_rate": 1.01604208073247e-05, "loss": 0.4305, "step": 2369 }, { "epoch": 1.53, "learning_rate": 1.0153446549537164e-05, "loss": 0.4066, "step": 2370 }, { "epoch": 1.53, "learning_rate": 1.0146472217094428e-05, "loss": 0.4231, "step": 2371 }, { "epoch": 1.53, "learning_rate": 1.0139497813389654e-05, "loss": 0.4196, "step": 2372 }, { "epoch": 1.53, "learning_rate": 1.0132523341816053e-05, "loss": 0.4189, "step": 2373 }, { "epoch": 1.53, "learning_rate": 1.0125548805766852e-05, "loss": 0.4462, "step": 2374 }, { "epoch": 1.53, "learning_rate": 1.0118574208635332e-05, "loss": 0.4115, "step": 2375 }, { "epoch": 1.53, "learning_rate": 1.0111599553814788e-05, "loss": 0.4124, "step": 2376 }, { "epoch": 1.53, "learning_rate": 1.0104624844698545e-05, "loss": 0.4266, "step": 2377 }, { "epoch": 1.54, "learning_rate": 1.0097650084679957e-05, "loss": 0.4168, "step": 2378 }, { "epoch": 1.54, "learning_rate": 1.0090675277152404e-05, "loss": 0.4301, "step": 2379 }, { "epoch": 1.54, "learning_rate": 1.008370042550928e-05, "loss": 0.4086, "step": 2380 }, { "epoch": 1.54, "learning_rate": 1.0076725533144016e-05, "loss": 0.4281, "step": 2381 }, { "epoch": 1.54, "learning_rate": 1.006975060345005e-05, "loss": 0.4426, "step": 2382 }, { "epoch": 1.54, "learning_rate": 1.0062775639820843e-05, "loss": 0.4382, "step": 2383 }, { "epoch": 1.54, "learning_rate": 1.0055800645649874e-05, "loss": 0.444, "step": 2384 }, { "epoch": 1.54, "learning_rate": 1.0048825624330631e-05, "loss": 0.4347, "step": 2385 }, { "epoch": 1.54, "learning_rate": 1.0041850579256623e-05, "loss": 0.4045, "step": 2386 }, { "epoch": 1.54, "learning_rate": 1.0034875513821363e-05, "loss": 0.4212, "step": 2387 }, { "epoch": 1.54, "learning_rate": 1.002790043141838e-05, "loss": 0.4197, "step": 2388 }, { "epoch": 1.54, "learning_rate": 1.002092533544121e-05, "loss": 0.4275, "step": 2389 }, { "epoch": 1.54, "learning_rate": 1.0013950229283392e-05, "loss": 0.4219, "step": 2390 }, { "epoch": 1.54, "learning_rate": 1.000697511633847e-05, "loss": 0.4227, "step": 2391 }, { "epoch": 1.54, "learning_rate": 1e-05, "loss": 0.4318, "step": 2392 }, { "epoch": 1.54, "learning_rate": 9.993024883661528e-06, "loss": 0.4417, "step": 2393 }, { "epoch": 1.55, "learning_rate": 9.986049770716613e-06, "loss": 0.396, "step": 2394 }, { "epoch": 1.55, "learning_rate": 9.979074664558792e-06, "loss": 0.4404, "step": 2395 }, { "epoch": 1.55, "learning_rate": 9.972099568581621e-06, "loss": 0.4353, "step": 2396 }, { "epoch": 1.55, "learning_rate": 9.96512448617864e-06, "loss": 0.4176, "step": 2397 }, { "epoch": 1.55, "learning_rate": 9.958149420743382e-06, "loss": 0.4157, "step": 2398 }, { "epoch": 1.55, "learning_rate": 9.95117437566937e-06, "loss": 0.4129, "step": 2399 }, { "epoch": 1.55, "learning_rate": 9.94419935435013e-06, "loss": 0.419, "step": 2400 }, { "epoch": 1.55, "learning_rate": 9.937224360179159e-06, "loss": 0.4222, "step": 2401 }, { "epoch": 1.55, "learning_rate": 9.930249396549954e-06, "loss": 0.4278, "step": 2402 }, { "epoch": 1.55, "learning_rate": 9.923274466855988e-06, "loss": 0.429, "step": 2403 }, { "epoch": 1.55, "learning_rate": 9.916299574490722e-06, "loss": 0.4392, "step": 2404 }, { "epoch": 1.55, "learning_rate": 9.9093247228476e-06, "loss": 0.4563, "step": 2405 }, { "epoch": 1.55, "learning_rate": 9.902349915320045e-06, "loss": 0.4321, "step": 2406 }, { "epoch": 1.55, "learning_rate": 9.895375155301456e-06, "loss": 0.4357, "step": 2407 }, { "epoch": 1.55, "learning_rate": 9.888400446185212e-06, "loss": 0.4369, "step": 2408 }, { "epoch": 1.56, "learning_rate": 9.88142579136467e-06, "loss": 0.4253, "step": 2409 }, { "epoch": 1.56, "learning_rate": 9.874451194233148e-06, "loss": 0.4395, "step": 2410 }, { "epoch": 1.56, "learning_rate": 9.867476658183954e-06, "loss": 0.4332, "step": 2411 }, { "epoch": 1.56, "learning_rate": 9.860502186610349e-06, "loss": 0.4463, "step": 2412 }, { "epoch": 1.56, "learning_rate": 9.853527782905576e-06, "loss": 0.4065, "step": 2413 }, { "epoch": 1.56, "learning_rate": 9.846553450462838e-06, "loss": 0.4378, "step": 2414 }, { "epoch": 1.56, "learning_rate": 9.839579192675305e-06, "loss": 0.4235, "step": 2415 }, { "epoch": 1.56, "learning_rate": 9.832605012936107e-06, "loss": 0.422, "step": 2416 }, { "epoch": 1.56, "learning_rate": 9.825630914638348e-06, "loss": 0.4322, "step": 2417 }, { "epoch": 1.56, "learning_rate": 9.818656901175076e-06, "loss": 0.4271, "step": 2418 }, { "epoch": 1.56, "learning_rate": 9.811682975939309e-06, "loss": 0.437, "step": 2419 }, { "epoch": 1.56, "learning_rate": 9.80470914232402e-06, "loss": 0.4277, "step": 2420 }, { "epoch": 1.56, "learning_rate": 9.797735403722132e-06, "loss": 0.431, "step": 2421 }, { "epoch": 1.56, "learning_rate": 9.790761763526533e-06, "loss": 0.4128, "step": 2422 }, { "epoch": 1.56, "learning_rate": 9.78378822513005e-06, "loss": 0.4051, "step": 2423 }, { "epoch": 1.57, "learning_rate": 9.77681479192547e-06, "loss": 0.4185, "step": 2424 }, { "epoch": 1.57, "learning_rate": 9.76984146730552e-06, "loss": 0.4296, "step": 2425 }, { "epoch": 1.57, "learning_rate": 9.762868254662888e-06, "loss": 0.4174, "step": 2426 }, { "epoch": 1.57, "learning_rate": 9.755895157390191e-06, "loss": 0.4171, "step": 2427 }, { "epoch": 1.57, "learning_rate": 9.748922178880005e-06, "loss": 0.4578, "step": 2428 }, { "epoch": 1.57, "learning_rate": 9.741949322524835e-06, "loss": 0.4476, "step": 2429 }, { "epoch": 1.57, "learning_rate": 9.734976591717136e-06, "loss": 0.4423, "step": 2430 }, { "epoch": 1.57, "learning_rate": 9.7280039898493e-06, "loss": 0.4181, "step": 2431 }, { "epoch": 1.57, "learning_rate": 9.721031520313653e-06, "loss": 0.4439, "step": 2432 }, { "epoch": 1.57, "learning_rate": 9.714059186502459e-06, "loss": 0.4522, "step": 2433 }, { "epoch": 1.57, "learning_rate": 9.707086991807912e-06, "loss": 0.4501, "step": 2434 }, { "epoch": 1.57, "learning_rate": 9.700114939622151e-06, "loss": 0.4533, "step": 2435 }, { "epoch": 1.57, "learning_rate": 9.693143033337228e-06, "loss": 0.4148, "step": 2436 }, { "epoch": 1.57, "learning_rate": 9.686171276345137e-06, "loss": 0.4632, "step": 2437 }, { "epoch": 1.57, "learning_rate": 9.679199672037791e-06, "loss": 0.4252, "step": 2438 }, { "epoch": 1.57, "learning_rate": 9.672228223807037e-06, "loss": 0.4358, "step": 2439 }, { "epoch": 1.58, "learning_rate": 9.665256935044636e-06, "loss": 0.4292, "step": 2440 }, { "epoch": 1.58, "learning_rate": 9.658285809142285e-06, "loss": 0.4117, "step": 2441 }, { "epoch": 1.58, "learning_rate": 9.651314849491586e-06, "loss": 0.4107, "step": 2442 }, { "epoch": 1.58, "learning_rate": 9.644344059484072e-06, "loss": 0.4623, "step": 2443 }, { "epoch": 1.58, "learning_rate": 9.637373442511192e-06, "loss": 0.4317, "step": 2444 }, { "epoch": 1.58, "learning_rate": 9.630403001964302e-06, "loss": 0.4463, "step": 2445 }, { "epoch": 1.58, "learning_rate": 9.623432741234686e-06, "loss": 0.4142, "step": 2446 }, { "epoch": 1.58, "learning_rate": 9.616462663713524e-06, "loss": 0.4252, "step": 2447 }, { "epoch": 1.58, "learning_rate": 9.609492772791924e-06, "loss": 0.4176, "step": 2448 }, { "epoch": 1.58, "learning_rate": 9.602523071860889e-06, "loss": 0.4322, "step": 2449 }, { "epoch": 1.58, "learning_rate": 9.59555356431134e-06, "loss": 0.4304, "step": 2450 }, { "epoch": 1.58, "learning_rate": 9.588584253534096e-06, "loss": 0.448, "step": 2451 }, { "epoch": 1.58, "learning_rate": 9.581615142919887e-06, "loss": 0.4312, "step": 2452 }, { "epoch": 1.58, "learning_rate": 9.574646235859339e-06, "loss": 0.4287, "step": 2453 }, { "epoch": 1.58, "learning_rate": 9.567677535742988e-06, "loss": 0.4245, "step": 2454 }, { "epoch": 1.59, "learning_rate": 9.560709045961254e-06, "loss": 0.4146, "step": 2455 }, { "epoch": 1.59, "learning_rate": 9.55374076990447e-06, "loss": 0.4217, "step": 2456 }, { "epoch": 1.59, "learning_rate": 9.546772710962864e-06, "loss": 0.4246, "step": 2457 }, { "epoch": 1.59, "learning_rate": 9.539804872526547e-06, "loss": 0.428, "step": 2458 }, { "epoch": 1.59, "learning_rate": 9.532837257985533e-06, "loss": 0.421, "step": 2459 }, { "epoch": 1.59, "learning_rate": 9.525869870729719e-06, "loss": 0.4298, "step": 2460 }, { "epoch": 1.59, "learning_rate": 9.518902714148904e-06, "loss": 0.4235, "step": 2461 }, { "epoch": 1.59, "learning_rate": 9.511935791632758e-06, "loss": 0.4447, "step": 2462 }, { "epoch": 1.59, "learning_rate": 9.504969106570853e-06, "loss": 0.4272, "step": 2463 }, { "epoch": 1.59, "learning_rate": 9.49800266235263e-06, "loss": 0.4469, "step": 2464 }, { "epoch": 1.59, "learning_rate": 9.491036462367428e-06, "loss": 0.42, "step": 2465 }, { "epoch": 1.59, "learning_rate": 9.484070510004459e-06, "loss": 0.4263, "step": 2466 }, { "epoch": 1.59, "learning_rate": 9.477104808652818e-06, "loss": 0.4177, "step": 2467 }, { "epoch": 1.59, "learning_rate": 9.470139361701469e-06, "loss": 0.4379, "step": 2468 }, { "epoch": 1.59, "learning_rate": 9.463174172539261e-06, "loss": 0.4264, "step": 2469 }, { "epoch": 1.59, "learning_rate": 9.456209244554923e-06, "loss": 0.4268, "step": 2470 }, { "epoch": 1.6, "learning_rate": 9.449244581137038e-06, "loss": 0.4258, "step": 2471 }, { "epoch": 1.6, "learning_rate": 9.442280185674084e-06, "loss": 0.4472, "step": 2472 }, { "epoch": 1.6, "learning_rate": 9.435316061554384e-06, "loss": 0.4356, "step": 2473 }, { "epoch": 1.6, "learning_rate": 9.428352212166149e-06, "loss": 0.398, "step": 2474 }, { "epoch": 1.6, "learning_rate": 9.421388640897447e-06, "loss": 0.4038, "step": 2475 }, { "epoch": 1.6, "learning_rate": 9.414425351136215e-06, "loss": 0.4012, "step": 2476 }, { "epoch": 1.6, "learning_rate": 9.407462346270242e-06, "loss": 0.4291, "step": 2477 }, { "epoch": 1.6, "learning_rate": 9.4004996296872e-06, "loss": 0.4123, "step": 2478 }, { "epoch": 1.6, "learning_rate": 9.393537204774594e-06, "loss": 0.4316, "step": 2479 }, { "epoch": 1.6, "learning_rate": 9.386575074919806e-06, "loss": 0.476, "step": 2480 }, { "epoch": 1.6, "learning_rate": 9.379613243510075e-06, "loss": 0.4505, "step": 2481 }, { "epoch": 1.6, "learning_rate": 9.372651713932479e-06, "loss": 0.4498, "step": 2482 }, { "epoch": 1.6, "learning_rate": 9.365690489573966e-06, "loss": 0.4354, "step": 2483 }, { "epoch": 1.6, "learning_rate": 9.358729573821325e-06, "loss": 0.4259, "step": 2484 }, { "epoch": 1.6, "learning_rate": 9.351768970061201e-06, "loss": 0.409, "step": 2485 }, { "epoch": 1.61, "learning_rate": 9.344808681680082e-06, "loss": 0.4221, "step": 2486 }, { "epoch": 1.61, "learning_rate": 9.337848712064309e-06, "loss": 0.4508, "step": 2487 }, { "epoch": 1.61, "learning_rate": 9.330889064600058e-06, "loss": 0.431, "step": 2488 }, { "epoch": 1.61, "learning_rate": 9.323929742673362e-06, "loss": 0.4373, "step": 2489 }, { "epoch": 1.61, "learning_rate": 9.31697074967008e-06, "loss": 0.4361, "step": 2490 }, { "epoch": 1.61, "learning_rate": 9.310012088975924e-06, "loss": 0.4206, "step": 2491 }, { "epoch": 1.61, "learning_rate": 9.303053763976436e-06, "loss": 0.4234, "step": 2492 }, { "epoch": 1.61, "learning_rate": 9.296095778057001e-06, "loss": 0.3996, "step": 2493 }, { "epoch": 1.61, "learning_rate": 9.289138134602837e-06, "loss": 0.4353, "step": 2494 }, { "epoch": 1.61, "learning_rate": 9.28218083699899e-06, "loss": 0.4154, "step": 2495 }, { "epoch": 1.61, "learning_rate": 9.275223888630348e-06, "loss": 0.4216, "step": 2496 }, { "epoch": 1.61, "learning_rate": 9.268267292881615e-06, "loss": 0.4198, "step": 2497 }, { "epoch": 1.61, "learning_rate": 9.26131105313734e-06, "loss": 0.4337, "step": 2498 }, { "epoch": 1.61, "learning_rate": 9.254355172781883e-06, "loss": 0.4181, "step": 2499 }, { "epoch": 1.61, "learning_rate": 9.247399655199444e-06, "loss": 0.4266, "step": 2500 }, { "epoch": 1.61, "learning_rate": 9.24044450377403e-06, "loss": 0.4218, "step": 2501 }, { "epoch": 1.62, "learning_rate": 9.233489721889491e-06, "loss": 0.4449, "step": 2502 }, { "epoch": 1.62, "learning_rate": 9.226535312929473e-06, "loss": 0.4076, "step": 2503 }, { "epoch": 1.62, "learning_rate": 9.219581280277463e-06, "loss": 0.4256, "step": 2504 }, { "epoch": 1.62, "learning_rate": 9.212627627316744e-06, "loss": 0.4363, "step": 2505 }, { "epoch": 1.62, "learning_rate": 9.205674357430432e-06, "loss": 0.4118, "step": 2506 }, { "epoch": 1.62, "learning_rate": 9.198721474001451e-06, "loss": 0.446, "step": 2507 }, { "epoch": 1.62, "learning_rate": 9.191768980412528e-06, "loss": 0.4254, "step": 2508 }, { "epoch": 1.62, "learning_rate": 9.184816880046217e-06, "loss": 0.4254, "step": 2509 }, { "epoch": 1.62, "learning_rate": 9.177865176284863e-06, "loss": 0.4279, "step": 2510 }, { "epoch": 1.62, "learning_rate": 9.170913872510634e-06, "loss": 0.4295, "step": 2511 }, { "epoch": 1.62, "learning_rate": 9.163962972105488e-06, "loss": 0.4459, "step": 2512 }, { "epoch": 1.62, "learning_rate": 9.157012478451205e-06, "loss": 0.4103, "step": 2513 }, { "epoch": 1.62, "learning_rate": 9.150062394929347e-06, "loss": 0.4162, "step": 2514 }, { "epoch": 1.62, "learning_rate": 9.143112724921293e-06, "loss": 0.4081, "step": 2515 }, { "epoch": 1.62, "learning_rate": 9.136163471808207e-06, "loss": 0.423, "step": 2516 }, { "epoch": 1.63, "learning_rate": 9.129214638971065e-06, "loss": 0.4219, "step": 2517 }, { "epoch": 1.63, "learning_rate": 9.122266229790624e-06, "loss": 0.4373, "step": 2518 }, { "epoch": 1.63, "learning_rate": 9.115318247647448e-06, "loss": 0.4469, "step": 2519 }, { "epoch": 1.63, "learning_rate": 9.108370695921884e-06, "loss": 0.4391, "step": 2520 }, { "epoch": 1.63, "learning_rate": 9.101423577994069e-06, "loss": 0.4593, "step": 2521 }, { "epoch": 1.63, "learning_rate": 9.09447689724394e-06, "loss": 0.4357, "step": 2522 }, { "epoch": 1.63, "learning_rate": 9.087530657051207e-06, "loss": 0.4462, "step": 2523 }, { "epoch": 1.63, "learning_rate": 9.080584860795378e-06, "loss": 0.4232, "step": 2524 }, { "epoch": 1.63, "learning_rate": 9.073639511855734e-06, "loss": 0.4231, "step": 2525 }, { "epoch": 1.63, "learning_rate": 9.066694613611346e-06, "loss": 0.4175, "step": 2526 }, { "epoch": 1.63, "learning_rate": 9.059750169441064e-06, "loss": 0.4173, "step": 2527 }, { "epoch": 1.63, "learning_rate": 9.05280618272352e-06, "loss": 0.4316, "step": 2528 }, { "epoch": 1.63, "learning_rate": 9.045862656837112e-06, "loss": 0.4212, "step": 2529 }, { "epoch": 1.63, "learning_rate": 9.038919595160026e-06, "loss": 0.4255, "step": 2530 }, { "epoch": 1.63, "learning_rate": 9.031977001070222e-06, "loss": 0.4409, "step": 2531 }, { "epoch": 1.63, "learning_rate": 9.025034877945422e-06, "loss": 0.4423, "step": 2532 }, { "epoch": 1.64, "learning_rate": 9.018093229163133e-06, "loss": 0.431, "step": 2533 }, { "epoch": 1.64, "learning_rate": 9.011152058100612e-06, "loss": 0.4217, "step": 2534 }, { "epoch": 1.64, "learning_rate": 9.004211368134906e-06, "loss": 0.4174, "step": 2535 }, { "epoch": 1.64, "learning_rate": 8.99727116264281e-06, "loss": 0.427, "step": 2536 }, { "epoch": 1.64, "learning_rate": 8.990331445000898e-06, "loss": 0.4481, "step": 2537 }, { "epoch": 1.64, "learning_rate": 8.98339221858549e-06, "loss": 0.4177, "step": 2538 }, { "epoch": 1.64, "learning_rate": 8.976453486772683e-06, "loss": 0.4379, "step": 2539 }, { "epoch": 1.64, "learning_rate": 8.969515252938323e-06, "loss": 0.4458, "step": 2540 }, { "epoch": 1.64, "learning_rate": 8.96257752045802e-06, "loss": 0.4245, "step": 2541 }, { "epoch": 1.64, "learning_rate": 8.955640292707131e-06, "loss": 0.4278, "step": 2542 }, { "epoch": 1.64, "learning_rate": 8.948703573060779e-06, "loss": 0.425, "step": 2543 }, { "epoch": 1.64, "learning_rate": 8.941767364893836e-06, "loss": 0.4328, "step": 2544 }, { "epoch": 1.64, "learning_rate": 8.934831671580919e-06, "loss": 0.4335, "step": 2545 }, { "epoch": 1.64, "learning_rate": 8.927896496496402e-06, "loss": 0.4654, "step": 2546 }, { "epoch": 1.64, "learning_rate": 8.920961843014403e-06, "loss": 0.4082, "step": 2547 }, { "epoch": 1.65, "learning_rate": 8.914027714508788e-06, "loss": 0.4559, "step": 2548 }, { "epoch": 1.65, "learning_rate": 8.907094114353165e-06, "loss": 0.4421, "step": 2549 }, { "epoch": 1.65, "learning_rate": 8.90016104592089e-06, "loss": 0.4186, "step": 2550 }, { "epoch": 1.65, "learning_rate": 8.89322851258505e-06, "loss": 0.4188, "step": 2551 }, { "epoch": 1.65, "learning_rate": 8.88629651771849e-06, "loss": 0.4166, "step": 2552 }, { "epoch": 1.65, "learning_rate": 8.879365064693769e-06, "loss": 0.4193, "step": 2553 }, { "epoch": 1.65, "learning_rate": 8.872434156883207e-06, "loss": 0.4256, "step": 2554 }, { "epoch": 1.65, "learning_rate": 8.865503797658836e-06, "loss": 0.4198, "step": 2555 }, { "epoch": 1.65, "learning_rate": 8.85857399039244e-06, "loss": 0.4363, "step": 2556 }, { "epoch": 1.65, "learning_rate": 8.851644738455526e-06, "loss": 0.4192, "step": 2557 }, { "epoch": 1.65, "learning_rate": 8.844716045219325e-06, "loss": 0.4143, "step": 2558 }, { "epoch": 1.65, "learning_rate": 8.837787914054812e-06, "loss": 0.4138, "step": 2559 }, { "epoch": 1.65, "learning_rate": 8.830860348332666e-06, "loss": 0.4127, "step": 2560 }, { "epoch": 1.65, "learning_rate": 8.823933351423316e-06, "loss": 0.422, "step": 2561 }, { "epoch": 1.65, "learning_rate": 8.817006926696894e-06, "loss": 0.4262, "step": 2562 }, { "epoch": 1.65, "learning_rate": 8.810081077523271e-06, "loss": 0.4226, "step": 2563 }, { "epoch": 1.66, "learning_rate": 8.803155807272019e-06, "loss": 0.4253, "step": 2564 }, { "epoch": 1.66, "learning_rate": 8.796231119312443e-06, "loss": 0.414, "step": 2565 }, { "epoch": 1.66, "learning_rate": 8.789307017013556e-06, "loss": 0.4272, "step": 2566 }, { "epoch": 1.66, "learning_rate": 8.782383503744096e-06, "loss": 0.4346, "step": 2567 }, { "epoch": 1.66, "learning_rate": 8.775460582872502e-06, "loss": 0.4306, "step": 2568 }, { "epoch": 1.66, "learning_rate": 8.768538257766933e-06, "loss": 0.4101, "step": 2569 }, { "epoch": 1.66, "learning_rate": 8.761616531795258e-06, "loss": 0.4125, "step": 2570 }, { "epoch": 1.66, "learning_rate": 8.754695408325052e-06, "loss": 0.4276, "step": 2571 }, { "epoch": 1.66, "learning_rate": 8.7477748907236e-06, "loss": 0.4397, "step": 2572 }, { "epoch": 1.66, "learning_rate": 8.74085498235788e-06, "loss": 0.4041, "step": 2573 }, { "epoch": 1.66, "learning_rate": 8.733935686594599e-06, "loss": 0.4144, "step": 2574 }, { "epoch": 1.66, "learning_rate": 8.727017006800136e-06, "loss": 0.4299, "step": 2575 }, { "epoch": 1.66, "learning_rate": 8.720098946340594e-06, "loss": 0.4395, "step": 2576 }, { "epoch": 1.66, "learning_rate": 8.713181508581758e-06, "loss": 0.4296, "step": 2577 }, { "epoch": 1.66, "learning_rate": 8.706264696889122e-06, "loss": 0.4375, "step": 2578 }, { "epoch": 1.67, "learning_rate": 8.699348514627869e-06, "loss": 0.4385, "step": 2579 }, { "epoch": 1.67, "learning_rate": 8.69243296516288e-06, "loss": 0.4278, "step": 2580 }, { "epoch": 1.67, "learning_rate": 8.685518051858724e-06, "loss": 0.4295, "step": 2581 }, { "epoch": 1.67, "learning_rate": 8.678603778079661e-06, "loss": 0.4201, "step": 2582 }, { "epoch": 1.67, "learning_rate": 8.671690147189647e-06, "loss": 0.4056, "step": 2583 }, { "epoch": 1.67, "learning_rate": 8.664777162552308e-06, "loss": 0.4261, "step": 2584 }, { "epoch": 1.67, "learning_rate": 8.657864827530978e-06, "loss": 0.4715, "step": 2585 }, { "epoch": 1.67, "learning_rate": 8.650953145488656e-06, "loss": 0.4093, "step": 2586 }, { "epoch": 1.67, "learning_rate": 8.644042119788037e-06, "loss": 0.4268, "step": 2587 }, { "epoch": 1.67, "learning_rate": 8.637131753791485e-06, "loss": 0.4616, "step": 2588 }, { "epoch": 1.67, "learning_rate": 8.630222050861055e-06, "loss": 0.4324, "step": 2589 }, { "epoch": 1.67, "learning_rate": 8.623313014358467e-06, "loss": 0.4424, "step": 2590 }, { "epoch": 1.67, "learning_rate": 8.61640464764513e-06, "loss": 0.4013, "step": 2591 }, { "epoch": 1.67, "learning_rate": 8.609496954082113e-06, "loss": 0.4302, "step": 2592 }, { "epoch": 1.67, "learning_rate": 8.602589937030165e-06, "loss": 0.4389, "step": 2593 }, { "epoch": 1.67, "learning_rate": 8.595683599849716e-06, "loss": 0.4108, "step": 2594 }, { "epoch": 1.68, "learning_rate": 8.58877794590084e-06, "loss": 0.4417, "step": 2595 }, { "epoch": 1.68, "learning_rate": 8.581872978543305e-06, "loss": 0.4288, "step": 2596 }, { "epoch": 1.68, "learning_rate": 8.574968701136524e-06, "loss": 0.4163, "step": 2597 }, { "epoch": 1.68, "learning_rate": 8.568065117039592e-06, "loss": 0.4052, "step": 2598 }, { "epoch": 1.68, "learning_rate": 8.56116222961125e-06, "loss": 0.4496, "step": 2599 }, { "epoch": 1.68, "learning_rate": 8.554260042209918e-06, "loss": 0.4124, "step": 2600 }, { "epoch": 1.68, "learning_rate": 8.547358558193656e-06, "loss": 0.4274, "step": 2601 }, { "epoch": 1.68, "learning_rate": 8.5404577809202e-06, "loss": 0.3958, "step": 2602 }, { "epoch": 1.68, "learning_rate": 8.533557713746924e-06, "loss": 0.4527, "step": 2603 }, { "epoch": 1.68, "learning_rate": 8.526658360030876e-06, "loss": 0.4523, "step": 2604 }, { "epoch": 1.68, "learning_rate": 8.51975972312874e-06, "loss": 0.4344, "step": 2605 }, { "epoch": 1.68, "learning_rate": 8.51286180639686e-06, "loss": 0.4558, "step": 2606 }, { "epoch": 1.68, "learning_rate": 8.505964613191233e-06, "loss": 0.4464, "step": 2607 }, { "epoch": 1.68, "learning_rate": 8.499068146867492e-06, "loss": 0.4166, "step": 2608 }, { "epoch": 1.68, "learning_rate": 8.49217241078093e-06, "loss": 0.4092, "step": 2609 }, { "epoch": 1.69, "learning_rate": 8.485277408286471e-06, "loss": 0.452, "step": 2610 }, { "epoch": 1.69, "learning_rate": 8.478383142738696e-06, "loss": 0.4318, "step": 2611 }, { "epoch": 1.69, "learning_rate": 8.471489617491813e-06, "loss": 0.4305, "step": 2612 }, { "epoch": 1.69, "learning_rate": 8.464596835899683e-06, "loss": 0.4079, "step": 2613 }, { "epoch": 1.69, "learning_rate": 8.457704801315797e-06, "loss": 0.4572, "step": 2614 }, { "epoch": 1.69, "learning_rate": 8.45081351709329e-06, "loss": 0.4227, "step": 2615 }, { "epoch": 1.69, "learning_rate": 8.44392298658492e-06, "loss": 0.4424, "step": 2616 }, { "epoch": 1.69, "learning_rate": 8.437033213143091e-06, "loss": 0.4213, "step": 2617 }, { "epoch": 1.69, "learning_rate": 8.430144200119827e-06, "loss": 0.4141, "step": 2618 }, { "epoch": 1.69, "learning_rate": 8.423255950866793e-06, "loss": 0.4224, "step": 2619 }, { "epoch": 1.69, "learning_rate": 8.41636846873528e-06, "loss": 0.4151, "step": 2620 }, { "epoch": 1.69, "learning_rate": 8.409481757076196e-06, "loss": 0.4406, "step": 2621 }, { "epoch": 1.69, "learning_rate": 8.402595819240086e-06, "loss": 0.4094, "step": 2622 }, { "epoch": 1.69, "learning_rate": 8.395710658577113e-06, "loss": 0.4336, "step": 2623 }, { "epoch": 1.69, "learning_rate": 8.388826278437066e-06, "loss": 0.4258, "step": 2624 }, { "epoch": 1.69, "learning_rate": 8.381942682169343e-06, "loss": 0.4462, "step": 2625 }, { "epoch": 1.7, "learning_rate": 8.375059873122978e-06, "loss": 0.4102, "step": 2626 }, { "epoch": 1.7, "learning_rate": 8.368177854646606e-06, "loss": 0.4088, "step": 2627 }, { "epoch": 1.7, "learning_rate": 8.361296630088488e-06, "loss": 0.4061, "step": 2628 }, { "epoch": 1.7, "learning_rate": 8.35441620279649e-06, "loss": 0.4134, "step": 2629 }, { "epoch": 1.7, "learning_rate": 8.347536576118102e-06, "loss": 0.437, "step": 2630 }, { "epoch": 1.7, "learning_rate": 8.340657753400411e-06, "loss": 0.4261, "step": 2631 }, { "epoch": 1.7, "learning_rate": 8.333779737990124e-06, "loss": 0.4257, "step": 2632 }, { "epoch": 1.7, "learning_rate": 8.326902533233551e-06, "loss": 0.4351, "step": 2633 }, { "epoch": 1.7, "learning_rate": 8.320026142476601e-06, "loss": 0.4547, "step": 2634 }, { "epoch": 1.7, "learning_rate": 8.3131505690648e-06, "loss": 0.4376, "step": 2635 }, { "epoch": 1.7, "learning_rate": 8.306275816343262e-06, "loss": 0.4236, "step": 2636 }, { "epoch": 1.7, "learning_rate": 8.29940188765672e-06, "loss": 0.4503, "step": 2637 }, { "epoch": 1.7, "learning_rate": 8.292528786349481e-06, "loss": 0.417, "step": 2638 }, { "epoch": 1.7, "learning_rate": 8.285656515765476e-06, "loss": 0.4315, "step": 2639 }, { "epoch": 1.7, "learning_rate": 8.278785079248211e-06, "loss": 0.429, "step": 2640 }, { "epoch": 1.71, "learning_rate": 8.271914480140804e-06, "loss": 0.4249, "step": 2641 }, { "epoch": 1.71, "learning_rate": 8.265044721785946e-06, "loss": 0.4223, "step": 2642 }, { "epoch": 1.71, "learning_rate": 8.258175807525935e-06, "loss": 0.4329, "step": 2643 }, { "epoch": 1.71, "learning_rate": 8.251307740702656e-06, "loss": 0.4267, "step": 2644 }, { "epoch": 1.71, "learning_rate": 8.244440524657569e-06, "loss": 0.4263, "step": 2645 }, { "epoch": 1.71, "learning_rate": 8.237574162731739e-06, "loss": 0.4381, "step": 2646 }, { "epoch": 1.71, "learning_rate": 8.230708658265798e-06, "loss": 0.4134, "step": 2647 }, { "epoch": 1.71, "learning_rate": 8.223844014599973e-06, "loss": 0.4602, "step": 2648 }, { "epoch": 1.71, "learning_rate": 8.216980235074065e-06, "loss": 0.4387, "step": 2649 }, { "epoch": 1.71, "learning_rate": 8.210117323027464e-06, "loss": 0.4314, "step": 2650 }, { "epoch": 1.71, "learning_rate": 8.203255281799122e-06, "loss": 0.4321, "step": 2651 }, { "epoch": 1.71, "learning_rate": 8.196394114727586e-06, "loss": 0.414, "step": 2652 }, { "epoch": 1.71, "learning_rate": 8.189533825150958e-06, "loss": 0.4285, "step": 2653 }, { "epoch": 1.71, "learning_rate": 8.182674416406936e-06, "loss": 0.4379, "step": 2654 }, { "epoch": 1.71, "learning_rate": 8.175815891832764e-06, "loss": 0.4186, "step": 2655 }, { "epoch": 1.71, "learning_rate": 8.168958254765275e-06, "loss": 0.4522, "step": 2656 }, { "epoch": 1.72, "learning_rate": 8.162101508540867e-06, "loss": 0.4029, "step": 2657 }, { "epoch": 1.72, "learning_rate": 8.155245656495497e-06, "loss": 0.4084, "step": 2658 }, { "epoch": 1.72, "learning_rate": 8.148390701964697e-06, "loss": 0.44, "step": 2659 }, { "epoch": 1.72, "learning_rate": 8.14153664828355e-06, "loss": 0.4398, "step": 2660 }, { "epoch": 1.72, "learning_rate": 8.134683498786713e-06, "loss": 0.4347, "step": 2661 }, { "epoch": 1.72, "learning_rate": 8.12783125680839e-06, "loss": 0.4231, "step": 2662 }, { "epoch": 1.72, "learning_rate": 8.120979925682364e-06, "loss": 0.4406, "step": 2663 }, { "epoch": 1.72, "learning_rate": 8.114129508741947e-06, "loss": 0.4278, "step": 2664 }, { "epoch": 1.72, "learning_rate": 8.107280009320034e-06, "loss": 0.4274, "step": 2665 }, { "epoch": 1.72, "learning_rate": 8.100431430749052e-06, "loss": 0.4169, "step": 2666 }, { "epoch": 1.72, "learning_rate": 8.093583776360996e-06, "loss": 0.4452, "step": 2667 }, { "epoch": 1.72, "learning_rate": 8.086737049487398e-06, "loss": 0.4317, "step": 2668 }, { "epoch": 1.72, "learning_rate": 8.079891253459345e-06, "loss": 0.44, "step": 2669 }, { "epoch": 1.72, "learning_rate": 8.07304639160748e-06, "loss": 0.4374, "step": 2670 }, { "epoch": 1.72, "learning_rate": 8.066202467261973e-06, "loss": 0.4126, "step": 2671 }, { "epoch": 1.73, "learning_rate": 8.059359483752551e-06, "loss": 0.4123, "step": 2672 }, { "epoch": 1.73, "learning_rate": 8.052517444408478e-06, "loss": 0.437, "step": 2673 }, { "epoch": 1.73, "learning_rate": 8.045676352558563e-06, "loss": 0.4201, "step": 2674 }, { "epoch": 1.73, "learning_rate": 8.038836211531147e-06, "loss": 0.4192, "step": 2675 }, { "epoch": 1.73, "learning_rate": 8.031997024654123e-06, "loss": 0.4264, "step": 2676 }, { "epoch": 1.73, "learning_rate": 8.025158795254896e-06, "loss": 0.4424, "step": 2677 }, { "epoch": 1.73, "learning_rate": 8.01832152666043e-06, "loss": 0.443, "step": 2678 }, { "epoch": 1.73, "learning_rate": 8.011485222197201e-06, "loss": 0.4233, "step": 2679 }, { "epoch": 1.73, "learning_rate": 8.00464988519123e-06, "loss": 0.4262, "step": 2680 }, { "epoch": 1.73, "learning_rate": 7.997815518968063e-06, "loss": 0.4171, "step": 2681 }, { "epoch": 1.73, "learning_rate": 7.990982126852772e-06, "loss": 0.4037, "step": 2682 }, { "epoch": 1.73, "learning_rate": 7.984149712169957e-06, "loss": 0.4185, "step": 2683 }, { "epoch": 1.73, "learning_rate": 7.977318278243742e-06, "loss": 0.4162, "step": 2684 }, { "epoch": 1.73, "learning_rate": 7.970487828397774e-06, "loss": 0.4224, "step": 2685 }, { "epoch": 1.73, "learning_rate": 7.963658365955216e-06, "loss": 0.4016, "step": 2686 }, { "epoch": 1.73, "learning_rate": 7.956829894238763e-06, "loss": 0.4309, "step": 2687 }, { "epoch": 1.74, "learning_rate": 7.950002416570614e-06, "loss": 0.4203, "step": 2688 }, { "epoch": 1.74, "learning_rate": 7.943175936272495e-06, "loss": 0.423, "step": 2689 }, { "epoch": 1.74, "learning_rate": 7.936350456665639e-06, "loss": 0.4123, "step": 2690 }, { "epoch": 1.74, "learning_rate": 7.929525981070798e-06, "loss": 0.4267, "step": 2691 }, { "epoch": 1.74, "learning_rate": 7.92270251280823e-06, "loss": 0.4135, "step": 2692 }, { "epoch": 1.74, "learning_rate": 7.91588005519771e-06, "loss": 0.4095, "step": 2693 }, { "epoch": 1.74, "learning_rate": 7.909058611558522e-06, "loss": 0.4505, "step": 2694 }, { "epoch": 1.74, "learning_rate": 7.90223818520944e-06, "loss": 0.4363, "step": 2695 }, { "epoch": 1.74, "learning_rate": 7.895418779468766e-06, "loss": 0.4273, "step": 2696 }, { "epoch": 1.74, "learning_rate": 7.888600397654285e-06, "loss": 0.4338, "step": 2697 }, { "epoch": 1.74, "learning_rate": 7.881783043083304e-06, "loss": 0.4136, "step": 2698 }, { "epoch": 1.74, "learning_rate": 7.874966719072609e-06, "loss": 0.4278, "step": 2699 }, { "epoch": 1.74, "learning_rate": 7.868151428938502e-06, "loss": 0.4416, "step": 2700 }, { "epoch": 1.74, "learning_rate": 7.861337175996772e-06, "loss": 0.4263, "step": 2701 }, { "epoch": 1.74, "learning_rate": 7.854523963562713e-06, "loss": 0.427, "step": 2702 }, { "epoch": 1.75, "learning_rate": 7.847711794951097e-06, "loss": 0.4138, "step": 2703 }, { "epoch": 1.75, "learning_rate": 7.840900673476204e-06, "loss": 0.4114, "step": 2704 }, { "epoch": 1.75, "learning_rate": 7.834090602451795e-06, "loss": 0.4046, "step": 2705 }, { "epoch": 1.75, "learning_rate": 7.827281585191123e-06, "loss": 0.4306, "step": 2706 }, { "epoch": 1.75, "learning_rate": 7.820473625006935e-06, "loss": 0.4322, "step": 2707 }, { "epoch": 1.75, "learning_rate": 7.813666725211445e-06, "loss": 0.4404, "step": 2708 }, { "epoch": 1.75, "learning_rate": 7.806860889116375e-06, "loss": 0.4284, "step": 2709 }, { "epoch": 1.75, "learning_rate": 7.800056120032908e-06, "loss": 0.4393, "step": 2710 }, { "epoch": 1.75, "learning_rate": 7.793252421271729e-06, "loss": 0.4427, "step": 2711 }, { "epoch": 1.75, "learning_rate": 7.786449796142979e-06, "loss": 0.4459, "step": 2712 }, { "epoch": 1.75, "learning_rate": 7.779648247956295e-06, "loss": 0.4123, "step": 2713 }, { "epoch": 1.75, "learning_rate": 7.77284778002078e-06, "loss": 0.41, "step": 2714 }, { "epoch": 1.75, "learning_rate": 7.76604839564502e-06, "loss": 0.4177, "step": 2715 }, { "epoch": 1.75, "learning_rate": 7.759250098137061e-06, "loss": 0.4301, "step": 2716 }, { "epoch": 1.75, "learning_rate": 7.752452890804436e-06, "loss": 0.4298, "step": 2717 }, { "epoch": 1.75, "learning_rate": 7.745656776954133e-06, "loss": 0.436, "step": 2718 }, { "epoch": 1.76, "learning_rate": 7.738861759892618e-06, "loss": 0.4182, "step": 2719 }, { "epoch": 1.76, "learning_rate": 7.732067842925823e-06, "loss": 0.4139, "step": 2720 }, { "epoch": 1.76, "learning_rate": 7.725275029359136e-06, "loss": 0.4139, "step": 2721 }, { "epoch": 1.76, "learning_rate": 7.71848332249742e-06, "loss": 0.3978, "step": 2722 }, { "epoch": 1.76, "learning_rate": 7.711692725644983e-06, "loss": 0.4288, "step": 2723 }, { "epoch": 1.76, "learning_rate": 7.704903242105616e-06, "loss": 0.4264, "step": 2724 }, { "epoch": 1.76, "learning_rate": 7.698114875182546e-06, "loss": 0.4311, "step": 2725 }, { "epoch": 1.76, "learning_rate": 7.691327628178471e-06, "loss": 0.381, "step": 2726 }, { "epoch": 1.76, "learning_rate": 7.684541504395538e-06, "loss": 0.4422, "step": 2727 }, { "epoch": 1.76, "learning_rate": 7.677756507135354e-06, "loss": 0.4327, "step": 2728 }, { "epoch": 1.76, "learning_rate": 7.670972639698963e-06, "loss": 0.4494, "step": 2729 }, { "epoch": 1.76, "learning_rate": 7.664189905386878e-06, "loss": 0.4327, "step": 2730 }, { "epoch": 1.76, "learning_rate": 7.657408307499054e-06, "loss": 0.446, "step": 2731 }, { "epoch": 1.76, "learning_rate": 7.650627849334881e-06, "loss": 0.4219, "step": 2732 }, { "epoch": 1.76, "learning_rate": 7.643848534193216e-06, "loss": 0.4174, "step": 2733 }, { "epoch": 1.77, "learning_rate": 7.63707036537234e-06, "loss": 0.423, "step": 2734 }, { "epoch": 1.77, "learning_rate": 7.63029334616999e-06, "loss": 0.4144, "step": 2735 }, { "epoch": 1.77, "learning_rate": 7.623517479883335e-06, "loss": 0.4094, "step": 2736 }, { "epoch": 1.77, "learning_rate": 7.616742769808991e-06, "loss": 0.4252, "step": 2737 }, { "epoch": 1.77, "learning_rate": 7.609969219243002e-06, "loss": 0.4277, "step": 2738 }, { "epoch": 1.77, "learning_rate": 7.603196831480859e-06, "loss": 0.4192, "step": 2739 }, { "epoch": 1.77, "learning_rate": 7.596425609817474e-06, "loss": 0.4358, "step": 2740 }, { "epoch": 1.77, "learning_rate": 7.589655557547206e-06, "loss": 0.443, "step": 2741 }, { "epoch": 1.77, "learning_rate": 7.5828866779638326e-06, "loss": 0.4373, "step": 2742 }, { "epoch": 1.77, "learning_rate": 7.5761189743605665e-06, "loss": 0.4126, "step": 2743 }, { "epoch": 1.77, "learning_rate": 7.569352450030054e-06, "loss": 0.4486, "step": 2744 }, { "epoch": 1.77, "learning_rate": 7.562587108264354e-06, "loss": 0.407, "step": 2745 }, { "epoch": 1.77, "learning_rate": 7.555822952354968e-06, "loss": 0.4225, "step": 2746 }, { "epoch": 1.77, "learning_rate": 7.549059985592797e-06, "loss": 0.4112, "step": 2747 }, { "epoch": 1.77, "learning_rate": 7.542298211268189e-06, "loss": 0.4332, "step": 2748 }, { "epoch": 1.77, "learning_rate": 7.535537632670891e-06, "loss": 0.4206, "step": 2749 }, { "epoch": 1.78, "learning_rate": 7.528778253090082e-06, "loss": 0.4285, "step": 2750 }, { "epoch": 1.78, "learning_rate": 7.522020075814348e-06, "loss": 0.4313, "step": 2751 }, { "epoch": 1.78, "learning_rate": 7.515263104131699e-06, "loss": 0.4224, "step": 2752 }, { "epoch": 1.78, "learning_rate": 7.50850734132955e-06, "loss": 0.4581, "step": 2753 }, { "epoch": 1.78, "learning_rate": 7.501752790694738e-06, "loss": 0.3965, "step": 2754 }, { "epoch": 1.78, "learning_rate": 7.494999455513496e-06, "loss": 0.3934, "step": 2755 }, { "epoch": 1.78, "learning_rate": 7.488247339071478e-06, "loss": 0.4173, "step": 2756 }, { "epoch": 1.78, "learning_rate": 7.481496444653746e-06, "loss": 0.4215, "step": 2757 }, { "epoch": 1.78, "learning_rate": 7.47474677554475e-06, "loss": 0.4282, "step": 2758 }, { "epoch": 1.78, "learning_rate": 7.46799833502837e-06, "loss": 0.3934, "step": 2759 }, { "epoch": 1.78, "learning_rate": 7.461251126387863e-06, "loss": 0.4222, "step": 2760 }, { "epoch": 1.78, "learning_rate": 7.454505152905904e-06, "loss": 0.4166, "step": 2761 }, { "epoch": 1.78, "learning_rate": 7.447760417864558e-06, "loss": 0.4333, "step": 2762 }, { "epoch": 1.78, "learning_rate": 7.4410169245452965e-06, "loss": 0.4425, "step": 2763 }, { "epoch": 1.78, "learning_rate": 7.434274676228973e-06, "loss": 0.4213, "step": 2764 }, { "epoch": 1.79, "learning_rate": 7.427533676195852e-06, "loss": 0.4305, "step": 2765 }, { "epoch": 1.79, "learning_rate": 7.420793927725572e-06, "loss": 0.4337, "step": 2766 }, { "epoch": 1.79, "learning_rate": 7.414055434097181e-06, "loss": 0.4088, "step": 2767 }, { "epoch": 1.79, "learning_rate": 7.407318198589102e-06, "loss": 0.4304, "step": 2768 }, { "epoch": 1.79, "learning_rate": 7.4005822244791545e-06, "loss": 0.4205, "step": 2769 }, { "epoch": 1.79, "learning_rate": 7.393847515044543e-06, "loss": 0.421, "step": 2770 }, { "epoch": 1.79, "learning_rate": 7.387114073561854e-06, "loss": 0.4186, "step": 2771 }, { "epoch": 1.79, "learning_rate": 7.380381903307061e-06, "loss": 0.4256, "step": 2772 }, { "epoch": 1.79, "learning_rate": 7.373651007555511e-06, "loss": 0.4266, "step": 2773 }, { "epoch": 1.79, "learning_rate": 7.366921389581945e-06, "loss": 0.4428, "step": 2774 }, { "epoch": 1.79, "learning_rate": 7.360193052660463e-06, "loss": 0.4313, "step": 2775 }, { "epoch": 1.79, "learning_rate": 7.353466000064563e-06, "loss": 0.4182, "step": 2776 }, { "epoch": 1.79, "learning_rate": 7.346740235067099e-06, "loss": 0.4113, "step": 2777 }, { "epoch": 1.79, "learning_rate": 7.340015760940313e-06, "loss": 0.4162, "step": 2778 }, { "epoch": 1.79, "learning_rate": 7.333292580955808e-06, "loss": 0.4154, "step": 2779 }, { "epoch": 1.79, "learning_rate": 7.326570698384569e-06, "loss": 0.4217, "step": 2780 }, { "epoch": 1.8, "learning_rate": 7.319850116496942e-06, "loss": 0.4345, "step": 2781 }, { "epoch": 1.8, "learning_rate": 7.313130838562638e-06, "loss": 0.4147, "step": 2782 }, { "epoch": 1.8, "learning_rate": 7.3064128678507415e-06, "loss": 0.425, "step": 2783 }, { "epoch": 1.8, "learning_rate": 7.299696207629692e-06, "loss": 0.4078, "step": 2784 }, { "epoch": 1.8, "learning_rate": 7.2929808611673e-06, "loss": 0.4215, "step": 2785 }, { "epoch": 1.8, "learning_rate": 7.286266831730728e-06, "loss": 0.4463, "step": 2786 }, { "epoch": 1.8, "learning_rate": 7.279554122586509e-06, "loss": 0.4131, "step": 2787 }, { "epoch": 1.8, "learning_rate": 7.2728427370005205e-06, "loss": 0.4381, "step": 2788 }, { "epoch": 1.8, "learning_rate": 7.266132678238013e-06, "loss": 0.4329, "step": 2789 }, { "epoch": 1.8, "learning_rate": 7.259423949563569e-06, "loss": 0.4226, "step": 2790 }, { "epoch": 1.8, "learning_rate": 7.252716554241146e-06, "loss": 0.4433, "step": 2791 }, { "epoch": 1.8, "learning_rate": 7.246010495534036e-06, "loss": 0.4521, "step": 2792 }, { "epoch": 1.8, "learning_rate": 7.23930577670489e-06, "loss": 0.4214, "step": 2793 }, { "epoch": 1.8, "learning_rate": 7.232602401015711e-06, "loss": 0.4069, "step": 2794 }, { "epoch": 1.8, "learning_rate": 7.225900371727832e-06, "loss": 0.421, "step": 2795 }, { "epoch": 1.81, "learning_rate": 7.2191996921019485e-06, "loss": 0.4346, "step": 2796 }, { "epoch": 1.81, "learning_rate": 7.212500365398087e-06, "loss": 0.4404, "step": 2797 }, { "epoch": 1.81, "learning_rate": 7.205802394875629e-06, "loss": 0.4435, "step": 2798 }, { "epoch": 1.81, "learning_rate": 7.199105783793279e-06, "loss": 0.4174, "step": 2799 }, { "epoch": 1.81, "learning_rate": 7.1924105354090955e-06, "loss": 0.4256, "step": 2800 }, { "epoch": 1.81, "learning_rate": 7.185716652980463e-06, "loss": 0.4328, "step": 2801 }, { "epoch": 1.81, "learning_rate": 7.17902413976411e-06, "loss": 0.4126, "step": 2802 }, { "epoch": 1.81, "learning_rate": 7.172332999016095e-06, "loss": 0.4172, "step": 2803 }, { "epoch": 1.81, "learning_rate": 7.165643233991806e-06, "loss": 0.4244, "step": 2804 }, { "epoch": 1.81, "learning_rate": 7.1589548479459645e-06, "loss": 0.3945, "step": 2805 }, { "epoch": 1.81, "learning_rate": 7.152267844132623e-06, "loss": 0.4235, "step": 2806 }, { "epoch": 1.81, "learning_rate": 7.1455822258051625e-06, "loss": 0.4358, "step": 2807 }, { "epoch": 1.81, "learning_rate": 7.138897996216278e-06, "loss": 0.4362, "step": 2808 }, { "epoch": 1.81, "learning_rate": 7.13221515861801e-06, "loss": 0.4166, "step": 2809 }, { "epoch": 1.81, "learning_rate": 7.1255337162616965e-06, "loss": 0.4275, "step": 2810 }, { "epoch": 1.81, "learning_rate": 7.118853672398023e-06, "loss": 0.4256, "step": 2811 }, { "epoch": 1.82, "learning_rate": 7.1121750302769685e-06, "loss": 0.4175, "step": 2812 }, { "epoch": 1.82, "learning_rate": 7.105497793147852e-06, "loss": 0.4209, "step": 2813 }, { "epoch": 1.82, "learning_rate": 7.098821964259295e-06, "loss": 0.4324, "step": 2814 }, { "epoch": 1.82, "learning_rate": 7.0921475468592435e-06, "loss": 0.4142, "step": 2815 }, { "epoch": 1.82, "learning_rate": 7.085474544194946e-06, "loss": 0.4385, "step": 2816 }, { "epoch": 1.82, "learning_rate": 7.078802959512976e-06, "loss": 0.4354, "step": 2817 }, { "epoch": 1.82, "learning_rate": 7.072132796059201e-06, "loss": 0.4204, "step": 2818 }, { "epoch": 1.82, "learning_rate": 7.065464057078812e-06, "loss": 0.4273, "step": 2819 }, { "epoch": 1.82, "learning_rate": 7.058796745816303e-06, "loss": 0.4058, "step": 2820 }, { "epoch": 1.82, "learning_rate": 7.052130865515463e-06, "loss": 0.4199, "step": 2821 }, { "epoch": 1.82, "learning_rate": 7.0454664194194e-06, "loss": 0.4337, "step": 2822 }, { "epoch": 1.82, "learning_rate": 7.038803410770512e-06, "loss": 0.4132, "step": 2823 }, { "epoch": 1.82, "learning_rate": 7.03214184281051e-06, "loss": 0.43, "step": 2824 }, { "epoch": 1.82, "learning_rate": 7.025481718780388e-06, "loss": 0.4246, "step": 2825 }, { "epoch": 1.82, "learning_rate": 7.018823041920456e-06, "loss": 0.4206, "step": 2826 }, { "epoch": 1.83, "learning_rate": 7.012165815470302e-06, "loss": 0.4487, "step": 2827 }, { "epoch": 1.83, "learning_rate": 7.0055100426688205e-06, "loss": 0.4262, "step": 2828 }, { "epoch": 1.83, "learning_rate": 6.998855726754195e-06, "loss": 0.4044, "step": 2829 }, { "epoch": 1.83, "learning_rate": 6.992202870963899e-06, "loss": 0.422, "step": 2830 }, { "epoch": 1.83, "learning_rate": 6.985551478534699e-06, "loss": 0.415, "step": 2831 }, { "epoch": 1.83, "learning_rate": 6.978901552702643e-06, "loss": 0.4085, "step": 2832 }, { "epoch": 1.83, "learning_rate": 6.972253096703079e-06, "loss": 0.4532, "step": 2833 }, { "epoch": 1.83, "learning_rate": 6.965606113770617e-06, "loss": 0.4523, "step": 2834 }, { "epoch": 1.83, "learning_rate": 6.958960607139178e-06, "loss": 0.424, "step": 2835 }, { "epoch": 1.83, "learning_rate": 6.95231658004194e-06, "loss": 0.4066, "step": 2836 }, { "epoch": 1.83, "learning_rate": 6.945674035711381e-06, "loss": 0.4383, "step": 2837 }, { "epoch": 1.83, "learning_rate": 6.939032977379237e-06, "loss": 0.4082, "step": 2838 }, { "epoch": 1.83, "learning_rate": 6.932393408276543e-06, "loss": 0.4417, "step": 2839 }, { "epoch": 1.83, "learning_rate": 6.925755331633592e-06, "loss": 0.4349, "step": 2840 }, { "epoch": 1.83, "learning_rate": 6.919118750679966e-06, "loss": 0.4319, "step": 2841 }, { "epoch": 1.83, "learning_rate": 6.912483668644503e-06, "loss": 0.4162, "step": 2842 }, { "epoch": 1.84, "learning_rate": 6.905850088755322e-06, "loss": 0.4311, "step": 2843 }, { "epoch": 1.84, "learning_rate": 6.899218014239815e-06, "loss": 0.4281, "step": 2844 }, { "epoch": 1.84, "learning_rate": 6.892587448324627e-06, "loss": 0.4132, "step": 2845 }, { "epoch": 1.84, "learning_rate": 6.8859583942356855e-06, "loss": 0.4385, "step": 2846 }, { "epoch": 1.84, "learning_rate": 6.879330855198168e-06, "loss": 0.4258, "step": 2847 }, { "epoch": 1.84, "learning_rate": 6.872704834436526e-06, "loss": 0.4145, "step": 2848 }, { "epoch": 1.84, "learning_rate": 6.866080335174466e-06, "loss": 0.4413, "step": 2849 }, { "epoch": 1.84, "learning_rate": 6.8594573606349575e-06, "loss": 0.385, "step": 2850 }, { "epoch": 1.84, "learning_rate": 6.852835914040226e-06, "loss": 0.4184, "step": 2851 }, { "epoch": 1.84, "learning_rate": 6.846215998611757e-06, "loss": 0.3976, "step": 2852 }, { "epoch": 1.84, "learning_rate": 6.839597617570282e-06, "loss": 0.4312, "step": 2853 }, { "epoch": 1.84, "learning_rate": 6.8329807741358e-06, "loss": 0.4498, "step": 2854 }, { "epoch": 1.84, "learning_rate": 6.826365471527551e-06, "loss": 0.3972, "step": 2855 }, { "epoch": 1.84, "learning_rate": 6.8197517129640265e-06, "loss": 0.4283, "step": 2856 }, { "epoch": 1.84, "learning_rate": 6.8131395016629745e-06, "loss": 0.4455, "step": 2857 }, { "epoch": 1.85, "learning_rate": 6.80652884084138e-06, "loss": 0.4272, "step": 2858 }, { "epoch": 1.85, "learning_rate": 6.799919733715485e-06, "loss": 0.4309, "step": 2859 }, { "epoch": 1.85, "learning_rate": 6.79331218350076e-06, "loss": 0.418, "step": 2860 }, { "epoch": 1.85, "learning_rate": 6.786706193411935e-06, "loss": 0.4181, "step": 2861 }, { "epoch": 1.85, "learning_rate": 6.780101766662966e-06, "loss": 0.4211, "step": 2862 }, { "epoch": 1.85, "learning_rate": 6.773498906467062e-06, "loss": 0.4138, "step": 2863 }, { "epoch": 1.85, "learning_rate": 6.766897616036661e-06, "loss": 0.4146, "step": 2864 }, { "epoch": 1.85, "learning_rate": 6.760297898583439e-06, "loss": 0.4432, "step": 2865 }, { "epoch": 1.85, "learning_rate": 6.753699757318304e-06, "loss": 0.4237, "step": 2866 }, { "epoch": 1.85, "learning_rate": 6.74710319545141e-06, "loss": 0.4106, "step": 2867 }, { "epoch": 1.85, "learning_rate": 6.740508216192121e-06, "loss": 0.4233, "step": 2868 }, { "epoch": 1.85, "learning_rate": 6.7339148227490505e-06, "loss": 0.4389, "step": 2869 }, { "epoch": 1.85, "learning_rate": 6.727323018330034e-06, "loss": 0.4186, "step": 2870 }, { "epoch": 1.85, "learning_rate": 6.7207328061421276e-06, "loss": 0.4115, "step": 2871 }, { "epoch": 1.85, "learning_rate": 6.714144189391625e-06, "loss": 0.4329, "step": 2872 }, { "epoch": 1.85, "learning_rate": 6.7075571712840285e-06, "loss": 0.4236, "step": 2873 }, { "epoch": 1.86, "learning_rate": 6.700971755024077e-06, "loss": 0.4252, "step": 2874 }, { "epoch": 1.86, "learning_rate": 6.694387943815719e-06, "loss": 0.4296, "step": 2875 }, { "epoch": 1.86, "learning_rate": 6.6878057408621345e-06, "loss": 0.414, "step": 2876 }, { "epoch": 1.86, "learning_rate": 6.681225149365704e-06, "loss": 0.4205, "step": 2877 }, { "epoch": 1.86, "learning_rate": 6.674646172528045e-06, "loss": 0.4033, "step": 2878 }, { "epoch": 1.86, "learning_rate": 6.668068813549966e-06, "loss": 0.42, "step": 2879 }, { "epoch": 1.86, "learning_rate": 6.661493075631506e-06, "loss": 0.4132, "step": 2880 }, { "epoch": 1.86, "learning_rate": 6.6549189619719145e-06, "loss": 0.4145, "step": 2881 }, { "epoch": 1.86, "learning_rate": 6.648346475769637e-06, "loss": 0.4044, "step": 2882 }, { "epoch": 1.86, "learning_rate": 6.641775620222342e-06, "loss": 0.436, "step": 2883 }, { "epoch": 1.86, "learning_rate": 6.635206398526895e-06, "loss": 0.4154, "step": 2884 }, { "epoch": 1.86, "learning_rate": 6.628638813879378e-06, "loss": 0.4402, "step": 2885 }, { "epoch": 1.86, "learning_rate": 6.622072869475058e-06, "loss": 0.4217, "step": 2886 }, { "epoch": 1.86, "learning_rate": 6.615508568508425e-06, "loss": 0.4281, "step": 2887 }, { "epoch": 1.86, "learning_rate": 6.60894591417315e-06, "loss": 0.4395, "step": 2888 }, { "epoch": 1.87, "learning_rate": 6.6023849096621205e-06, "loss": 0.4213, "step": 2889 }, { "epoch": 1.87, "learning_rate": 6.59582555816741e-06, "loss": 0.4099, "step": 2890 }, { "epoch": 1.87, "learning_rate": 6.589267862880289e-06, "loss": 0.4125, "step": 2891 }, { "epoch": 1.87, "learning_rate": 6.582711826991226e-06, "loss": 0.4097, "step": 2892 }, { "epoch": 1.87, "learning_rate": 6.576157453689877e-06, "loss": 0.4336, "step": 2893 }, { "epoch": 1.87, "learning_rate": 6.569604746165101e-06, "loss": 0.4052, "step": 2894 }, { "epoch": 1.87, "learning_rate": 6.563053707604927e-06, "loss": 0.4379, "step": 2895 }, { "epoch": 1.87, "learning_rate": 6.556504341196592e-06, "loss": 0.4071, "step": 2896 }, { "epoch": 1.87, "learning_rate": 6.5499566501265036e-06, "loss": 0.4129, "step": 2897 }, { "epoch": 1.87, "learning_rate": 6.543410637580265e-06, "loss": 0.4306, "step": 2898 }, { "epoch": 1.87, "learning_rate": 6.536866306742658e-06, "loss": 0.4199, "step": 2899 }, { "epoch": 1.87, "learning_rate": 6.5303236607976465e-06, "loss": 0.4397, "step": 2900 }, { "epoch": 1.87, "learning_rate": 6.523782702928372e-06, "loss": 0.4158, "step": 2901 }, { "epoch": 1.87, "learning_rate": 6.517243436317167e-06, "loss": 0.4239, "step": 2902 }, { "epoch": 1.87, "learning_rate": 6.510705864145521e-06, "loss": 0.4208, "step": 2903 }, { "epoch": 1.87, "learning_rate": 6.504169989594121e-06, "loss": 0.4561, "step": 2904 }, { "epoch": 1.88, "learning_rate": 6.497635815842809e-06, "loss": 0.396, "step": 2905 }, { "epoch": 1.88, "learning_rate": 6.491103346070608e-06, "loss": 0.4241, "step": 2906 }, { "epoch": 1.88, "learning_rate": 6.48457258345572e-06, "loss": 0.4542, "step": 2907 }, { "epoch": 1.88, "learning_rate": 6.4780435311754986e-06, "loss": 0.4458, "step": 2908 }, { "epoch": 1.88, "learning_rate": 6.471516192406481e-06, "loss": 0.4245, "step": 2909 }, { "epoch": 1.88, "learning_rate": 6.464990570324361e-06, "loss": 0.4507, "step": 2910 }, { "epoch": 1.88, "learning_rate": 6.458466668104005e-06, "loss": 0.4083, "step": 2911 }, { "epoch": 1.88, "learning_rate": 6.451944488919433e-06, "loss": 0.4253, "step": 2912 }, { "epoch": 1.88, "learning_rate": 6.445424035943839e-06, "loss": 0.414, "step": 2913 }, { "epoch": 1.88, "learning_rate": 6.438905312349563e-06, "loss": 0.4186, "step": 2914 }, { "epoch": 1.88, "learning_rate": 6.432388321308115e-06, "loss": 0.4212, "step": 2915 }, { "epoch": 1.88, "learning_rate": 6.425873065990158e-06, "loss": 0.4101, "step": 2916 }, { "epoch": 1.88, "learning_rate": 6.41935954956551e-06, "loss": 0.4295, "step": 2917 }, { "epoch": 1.88, "learning_rate": 6.412847775203141e-06, "loss": 0.4204, "step": 2918 }, { "epoch": 1.88, "learning_rate": 6.406337746071181e-06, "loss": 0.4329, "step": 2919 }, { "epoch": 1.89, "learning_rate": 6.3998294653369046e-06, "loss": 0.432, "step": 2920 }, { "epoch": 1.89, "learning_rate": 6.393322936166733e-06, "loss": 0.4352, "step": 2921 }, { "epoch": 1.89, "learning_rate": 6.386818161726246e-06, "loss": 0.4151, "step": 2922 }, { "epoch": 1.89, "learning_rate": 6.3803151451801545e-06, "loss": 0.4429, "step": 2923 }, { "epoch": 1.89, "learning_rate": 6.373813889692331e-06, "loss": 0.4194, "step": 2924 }, { "epoch": 1.89, "learning_rate": 6.367314398425777e-06, "loss": 0.425, "step": 2925 }, { "epoch": 1.89, "learning_rate": 6.360816674542644e-06, "loss": 0.3988, "step": 2926 }, { "epoch": 1.89, "learning_rate": 6.354320721204219e-06, "loss": 0.4187, "step": 2927 }, { "epoch": 1.89, "learning_rate": 6.347826541570936e-06, "loss": 0.4019, "step": 2928 }, { "epoch": 1.89, "learning_rate": 6.341334138802351e-06, "loss": 0.4365, "step": 2929 }, { "epoch": 1.89, "learning_rate": 6.334843516057168e-06, "loss": 0.4348, "step": 2930 }, { "epoch": 1.89, "learning_rate": 6.3283546764932245e-06, "loss": 0.4365, "step": 2931 }, { "epoch": 1.89, "learning_rate": 6.3218676232674815e-06, "loss": 0.4197, "step": 2932 }, { "epoch": 1.89, "learning_rate": 6.315382359536042e-06, "loss": 0.4299, "step": 2933 }, { "epoch": 1.89, "learning_rate": 6.30889888845413e-06, "loss": 0.4163, "step": 2934 }, { "epoch": 1.89, "learning_rate": 6.302417213176101e-06, "loss": 0.4447, "step": 2935 }, { "epoch": 1.9, "learning_rate": 6.295937336855433e-06, "loss": 0.423, "step": 2936 }, { "epoch": 1.9, "learning_rate": 6.289459262644739e-06, "loss": 0.4379, "step": 2937 }, { "epoch": 1.9, "learning_rate": 6.282982993695741e-06, "loss": 0.433, "step": 2938 }, { "epoch": 1.9, "learning_rate": 6.276508533159296e-06, "loss": 0.4228, "step": 2939 }, { "epoch": 1.9, "learning_rate": 6.270035884185367e-06, "loss": 0.444, "step": 2940 }, { "epoch": 1.9, "learning_rate": 6.26356504992305e-06, "loss": 0.4349, "step": 2941 }, { "epoch": 1.9, "learning_rate": 6.257096033520551e-06, "loss": 0.4632, "step": 2942 }, { "epoch": 1.9, "learning_rate": 6.25062883812519e-06, "loss": 0.4188, "step": 2943 }, { "epoch": 1.9, "learning_rate": 6.244163466883405e-06, "loss": 0.4342, "step": 2944 }, { "epoch": 1.9, "learning_rate": 6.2376999229407435e-06, "loss": 0.4039, "step": 2945 }, { "epoch": 1.9, "learning_rate": 6.231238209441869e-06, "loss": 0.4381, "step": 2946 }, { "epoch": 1.9, "learning_rate": 6.224778329530544e-06, "loss": 0.4164, "step": 2947 }, { "epoch": 1.9, "learning_rate": 6.218320286349655e-06, "loss": 0.4223, "step": 2948 }, { "epoch": 1.9, "learning_rate": 6.211864083041174e-06, "loss": 0.4308, "step": 2949 }, { "epoch": 1.9, "learning_rate": 6.2054097227462e-06, "loss": 0.4417, "step": 2950 }, { "epoch": 1.91, "learning_rate": 6.198957208604919e-06, "loss": 0.4128, "step": 2951 }, { "epoch": 1.91, "learning_rate": 6.192506543756626e-06, "loss": 0.4269, "step": 2952 }, { "epoch": 1.91, "learning_rate": 6.186057731339713e-06, "loss": 0.4386, "step": 2953 }, { "epoch": 1.91, "learning_rate": 6.179610774491678e-06, "loss": 0.4051, "step": 2954 }, { "epoch": 1.91, "learning_rate": 6.173165676349103e-06, "loss": 0.4036, "step": 2955 }, { "epoch": 1.91, "learning_rate": 6.1667224400476785e-06, "loss": 0.4331, "step": 2956 }, { "epoch": 1.91, "learning_rate": 6.160281068722187e-06, "loss": 0.447, "step": 2957 }, { "epoch": 1.91, "learning_rate": 6.153841565506493e-06, "loss": 0.4119, "step": 2958 }, { "epoch": 1.91, "learning_rate": 6.147403933533567e-06, "loss": 0.4448, "step": 2959 }, { "epoch": 1.91, "learning_rate": 6.140968175935458e-06, "loss": 0.4161, "step": 2960 }, { "epoch": 1.91, "learning_rate": 6.134534295843308e-06, "loss": 0.4256, "step": 2961 }, { "epoch": 1.91, "learning_rate": 6.128102296387344e-06, "loss": 0.4282, "step": 2962 }, { "epoch": 1.91, "learning_rate": 6.121672180696884e-06, "loss": 0.4297, "step": 2963 }, { "epoch": 1.91, "learning_rate": 6.115243951900316e-06, "loss": 0.4077, "step": 2964 }, { "epoch": 1.91, "learning_rate": 6.108817613125124e-06, "loss": 0.4218, "step": 2965 }, { "epoch": 1.91, "learning_rate": 6.102393167497862e-06, "loss": 0.4102, "step": 2966 }, { "epoch": 1.92, "learning_rate": 6.095970618144173e-06, "loss": 0.4148, "step": 2967 }, { "epoch": 1.92, "learning_rate": 6.089549968188767e-06, "loss": 0.4201, "step": 2968 }, { "epoch": 1.92, "learning_rate": 6.0831312207554395e-06, "loss": 0.417, "step": 2969 }, { "epoch": 1.92, "learning_rate": 6.0767143789670525e-06, "loss": 0.4168, "step": 2970 }, { "epoch": 1.92, "learning_rate": 6.070299445945544e-06, "loss": 0.4055, "step": 2971 }, { "epoch": 1.92, "learning_rate": 6.063886424811929e-06, "loss": 0.4113, "step": 2972 }, { "epoch": 1.92, "learning_rate": 6.057475318686278e-06, "loss": 0.4145, "step": 2973 }, { "epoch": 1.92, "learning_rate": 6.051066130687747e-06, "loss": 0.4085, "step": 2974 }, { "epoch": 1.92, "learning_rate": 6.044658863934543e-06, "loss": 0.4227, "step": 2975 }, { "epoch": 1.92, "learning_rate": 6.038253521543951e-06, "loss": 0.4043, "step": 2976 }, { "epoch": 1.92, "learning_rate": 6.031850106632313e-06, "loss": 0.424, "step": 2977 }, { "epoch": 1.92, "learning_rate": 6.025448622315033e-06, "loss": 0.4231, "step": 2978 }, { "epoch": 1.92, "learning_rate": 6.01904907170658e-06, "loss": 0.4113, "step": 2979 }, { "epoch": 1.92, "learning_rate": 6.01265145792048e-06, "loss": 0.4168, "step": 2980 }, { "epoch": 1.92, "learning_rate": 6.00625578406931e-06, "loss": 0.4029, "step": 2981 }, { "epoch": 1.93, "learning_rate": 5.999862053264716e-06, "loss": 0.4397, "step": 2982 }, { "epoch": 1.93, "learning_rate": 5.993470268617393e-06, "loss": 0.4384, "step": 2983 }, { "epoch": 1.93, "learning_rate": 5.987080433237082e-06, "loss": 0.4123, "step": 2984 }, { "epoch": 1.93, "learning_rate": 5.980692550232587e-06, "loss": 0.4275, "step": 2985 }, { "epoch": 1.93, "learning_rate": 5.974306622711758e-06, "loss": 0.4284, "step": 2986 }, { "epoch": 1.93, "learning_rate": 5.967922653781489e-06, "loss": 0.413, "step": 2987 }, { "epoch": 1.93, "learning_rate": 5.961540646547722e-06, "loss": 0.4179, "step": 2988 }, { "epoch": 1.93, "learning_rate": 5.955160604115457e-06, "loss": 0.4338, "step": 2989 }, { "epoch": 1.93, "learning_rate": 5.948782529588719e-06, "loss": 0.4171, "step": 2990 }, { "epoch": 1.93, "learning_rate": 5.942406426070593e-06, "loss": 0.4258, "step": 2991 }, { "epoch": 1.93, "learning_rate": 5.936032296663188e-06, "loss": 0.413, "step": 2992 }, { "epoch": 1.93, "learning_rate": 5.929660144467666e-06, "loss": 0.4218, "step": 2993 }, { "epoch": 1.93, "learning_rate": 5.9232899725842276e-06, "loss": 0.4325, "step": 2994 }, { "epoch": 1.93, "learning_rate": 5.916921784112098e-06, "loss": 0.4225, "step": 2995 }, { "epoch": 1.93, "learning_rate": 5.9105555821495486e-06, "loss": 0.4119, "step": 2996 }, { "epoch": 1.93, "learning_rate": 5.904191369793877e-06, "loss": 0.4224, "step": 2997 }, { "epoch": 1.94, "learning_rate": 5.8978291501414195e-06, "loss": 0.435, "step": 2998 }, { "epoch": 1.94, "learning_rate": 5.891468926287535e-06, "loss": 0.4124, "step": 2999 }, { "epoch": 1.94, "learning_rate": 5.885110701326621e-06, "loss": 0.4449, "step": 3000 }, { "epoch": 1.94, "learning_rate": 5.878754478352091e-06, "loss": 0.4176, "step": 3001 }, { "epoch": 1.94, "learning_rate": 5.872400260456397e-06, "loss": 0.4019, "step": 3002 }, { "epoch": 1.94, "learning_rate": 5.866048050731003e-06, "loss": 0.4324, "step": 3003 }, { "epoch": 1.94, "learning_rate": 5.859697852266409e-06, "loss": 0.4087, "step": 3004 }, { "epoch": 1.94, "learning_rate": 5.853349668152124e-06, "loss": 0.4213, "step": 3005 }, { "epoch": 1.94, "learning_rate": 5.847003501476682e-06, "loss": 0.4168, "step": 3006 }, { "epoch": 1.94, "learning_rate": 5.840659355327646e-06, "loss": 0.4227, "step": 3007 }, { "epoch": 1.94, "learning_rate": 5.834317232791575e-06, "loss": 0.4324, "step": 3008 }, { "epoch": 1.94, "learning_rate": 5.82797713695406e-06, "loss": 0.4138, "step": 3009 }, { "epoch": 1.94, "learning_rate": 5.8216390708996955e-06, "loss": 0.411, "step": 3010 }, { "epoch": 1.94, "learning_rate": 5.815303037712103e-06, "loss": 0.4195, "step": 3011 }, { "epoch": 1.94, "learning_rate": 5.808969040473893e-06, "loss": 0.4047, "step": 3012 }, { "epoch": 1.95, "learning_rate": 5.8026370822667135e-06, "loss": 0.4537, "step": 3013 }, { "epoch": 1.95, "learning_rate": 5.796307166171185e-06, "loss": 0.4446, "step": 3014 }, { "epoch": 1.95, "learning_rate": 5.789979295266972e-06, "loss": 0.4163, "step": 3015 }, { "epoch": 1.95, "learning_rate": 5.7836534726327175e-06, "loss": 0.4425, "step": 3016 }, { "epoch": 1.95, "learning_rate": 5.77732970134608e-06, "loss": 0.4248, "step": 3017 }, { "epoch": 1.95, "learning_rate": 5.771007984483716e-06, "loss": 0.41, "step": 3018 }, { "epoch": 1.95, "learning_rate": 5.764688325121277e-06, "loss": 0.4213, "step": 3019 }, { "epoch": 1.95, "learning_rate": 5.758370726333434e-06, "loss": 0.4241, "step": 3020 }, { "epoch": 1.95, "learning_rate": 5.752055191193826e-06, "loss": 0.4123, "step": 3021 }, { "epoch": 1.95, "learning_rate": 5.745741722775117e-06, "loss": 0.4326, "step": 3022 }, { "epoch": 1.95, "learning_rate": 5.739430324148937e-06, "loss": 0.4223, "step": 3023 }, { "epoch": 1.95, "learning_rate": 5.733120998385935e-06, "loss": 0.4372, "step": 3024 }, { "epoch": 1.95, "learning_rate": 5.726813748555736e-06, "loss": 0.4294, "step": 3025 }, { "epoch": 1.95, "learning_rate": 5.72050857772696e-06, "loss": 0.4215, "step": 3026 }, { "epoch": 1.95, "learning_rate": 5.714205488967215e-06, "loss": 0.4484, "step": 3027 }, { "epoch": 1.95, "learning_rate": 5.707904485343094e-06, "loss": 0.4055, "step": 3028 }, { "epoch": 1.96, "learning_rate": 5.701605569920179e-06, "loss": 0.4059, "step": 3029 }, { "epoch": 1.96, "learning_rate": 5.695308745763034e-06, "loss": 0.4193, "step": 3030 }, { "epoch": 1.96, "learning_rate": 5.6890140159352055e-06, "loss": 0.4276, "step": 3031 }, { "epoch": 1.96, "learning_rate": 5.682721383499217e-06, "loss": 0.4227, "step": 3032 }, { "epoch": 1.96, "learning_rate": 5.676430851516586e-06, "loss": 0.4097, "step": 3033 }, { "epoch": 1.96, "learning_rate": 5.670142423047791e-06, "loss": 0.4216, "step": 3034 }, { "epoch": 1.96, "learning_rate": 5.6638561011522965e-06, "loss": 0.4293, "step": 3035 }, { "epoch": 1.96, "learning_rate": 5.657571888888538e-06, "loss": 0.4206, "step": 3036 }, { "epoch": 1.96, "learning_rate": 5.651289789313929e-06, "loss": 0.4346, "step": 3037 }, { "epoch": 1.96, "learning_rate": 5.64500980548485e-06, "loss": 0.4263, "step": 3038 }, { "epoch": 1.96, "learning_rate": 5.638731940456657e-06, "loss": 0.404, "step": 3039 }, { "epoch": 1.96, "learning_rate": 5.63245619728367e-06, "loss": 0.4194, "step": 3040 }, { "epoch": 1.96, "learning_rate": 5.626182579019182e-06, "loss": 0.4179, "step": 3041 }, { "epoch": 1.96, "learning_rate": 5.619911088715444e-06, "loss": 0.4098, "step": 3042 }, { "epoch": 1.96, "learning_rate": 5.613641729423687e-06, "loss": 0.446, "step": 3043 }, { "epoch": 1.97, "learning_rate": 5.60737450419409e-06, "loss": 0.4242, "step": 3044 }, { "epoch": 1.97, "learning_rate": 5.601109416075799e-06, "loss": 0.4169, "step": 3045 }, { "epoch": 1.97, "learning_rate": 5.594846468116923e-06, "loss": 0.4219, "step": 3046 }, { "epoch": 1.97, "learning_rate": 5.588585663364526e-06, "loss": 0.42, "step": 3047 }, { "epoch": 1.97, "learning_rate": 5.582327004864631e-06, "loss": 0.4072, "step": 3048 }, { "epoch": 1.97, "learning_rate": 5.576070495662215e-06, "loss": 0.4185, "step": 3049 }, { "epoch": 1.97, "learning_rate": 5.569816138801211e-06, "loss": 0.3945, "step": 3050 }, { "epoch": 1.97, "learning_rate": 5.563563937324503e-06, "loss": 0.4391, "step": 3051 }, { "epoch": 1.97, "learning_rate": 5.557313894273937e-06, "loss": 0.4448, "step": 3052 }, { "epoch": 1.97, "learning_rate": 5.551066012690286e-06, "loss": 0.4188, "step": 3053 }, { "epoch": 1.97, "learning_rate": 5.5448202956133004e-06, "loss": 0.4461, "step": 3054 }, { "epoch": 1.97, "learning_rate": 5.5385767460816454e-06, "loss": 0.422, "step": 3055 }, { "epoch": 1.97, "learning_rate": 5.532335367132962e-06, "loss": 0.4147, "step": 3056 }, { "epoch": 1.97, "learning_rate": 5.526096161803816e-06, "loss": 0.4062, "step": 3057 }, { "epoch": 1.97, "learning_rate": 5.519859133129723e-06, "loss": 0.4393, "step": 3058 }, { "epoch": 1.97, "learning_rate": 5.513624284145138e-06, "loss": 0.4077, "step": 3059 }, { "epoch": 1.98, "learning_rate": 5.507391617883454e-06, "loss": 0.4261, "step": 3060 }, { "epoch": 1.98, "learning_rate": 5.501161137377005e-06, "loss": 0.4122, "step": 3061 }, { "epoch": 1.98, "learning_rate": 5.494932845657057e-06, "loss": 0.433, "step": 3062 }, { "epoch": 1.98, "learning_rate": 5.4887067457538245e-06, "loss": 0.4312, "step": 3063 }, { "epoch": 1.98, "learning_rate": 5.4824828406964305e-06, "loss": 0.4189, "step": 3064 }, { "epoch": 1.98, "learning_rate": 5.476261133512959e-06, "loss": 0.4231, "step": 3065 }, { "epoch": 1.98, "learning_rate": 5.470041627230398e-06, "loss": 0.4209, "step": 3066 }, { "epoch": 1.98, "learning_rate": 5.463824324874687e-06, "loss": 0.4312, "step": 3067 }, { "epoch": 1.98, "learning_rate": 5.457609229470681e-06, "loss": 0.418, "step": 3068 }, { "epoch": 1.98, "learning_rate": 5.451396344042165e-06, "loss": 0.4376, "step": 3069 }, { "epoch": 1.98, "learning_rate": 5.445185671611846e-06, "loss": 0.425, "step": 3070 }, { "epoch": 1.98, "learning_rate": 5.438977215201351e-06, "loss": 0.3752, "step": 3071 }, { "epoch": 1.98, "learning_rate": 5.4327709778312484e-06, "loss": 0.4104, "step": 3072 }, { "epoch": 1.98, "learning_rate": 5.426566962520996e-06, "loss": 0.4221, "step": 3073 }, { "epoch": 1.98, "learning_rate": 5.420365172289003e-06, "loss": 0.4092, "step": 3074 }, { "epoch": 1.99, "learning_rate": 5.414165610152563e-06, "loss": 0.4061, "step": 3075 }, { "epoch": 1.99, "learning_rate": 5.407968279127915e-06, "loss": 0.4241, "step": 3076 }, { "epoch": 1.99, "learning_rate": 5.401773182230199e-06, "loss": 0.4308, "step": 3077 }, { "epoch": 1.99, "learning_rate": 5.395580322473466e-06, "loss": 0.4365, "step": 3078 }, { "epoch": 1.99, "learning_rate": 5.3893897028706835e-06, "loss": 0.4003, "step": 3079 }, { "epoch": 1.99, "learning_rate": 5.383201326433727e-06, "loss": 0.4163, "step": 3080 }, { "epoch": 1.99, "learning_rate": 5.37701519617338e-06, "loss": 0.4138, "step": 3081 }, { "epoch": 1.99, "learning_rate": 5.370831315099332e-06, "loss": 0.4197, "step": 3082 }, { "epoch": 1.99, "learning_rate": 5.364649686220192e-06, "loss": 0.4181, "step": 3083 }, { "epoch": 1.99, "learning_rate": 5.358470312543445e-06, "loss": 0.4267, "step": 3084 }, { "epoch": 1.99, "learning_rate": 5.352293197075507e-06, "loss": 0.4488, "step": 3085 }, { "epoch": 1.99, "learning_rate": 5.346118342821683e-06, "loss": 0.4031, "step": 3086 }, { "epoch": 1.99, "learning_rate": 5.339945752786177e-06, "loss": 0.4372, "step": 3087 }, { "epoch": 1.99, "learning_rate": 5.3337754299720925e-06, "loss": 0.4367, "step": 3088 }, { "epoch": 1.99, "learning_rate": 5.32760737738143e-06, "loss": 0.4285, "step": 3089 }, { "epoch": 1.99, "learning_rate": 5.321441598015089e-06, "loss": 0.4383, "step": 3090 }, { "epoch": 2.0, "learning_rate": 5.315278094872857e-06, "loss": 0.4406, "step": 3091 }, { "epoch": 2.0, "learning_rate": 5.30911687095342e-06, "loss": 0.426, "step": 3092 }, { "epoch": 2.0, "learning_rate": 5.302957929254346e-06, "loss": 0.4241, "step": 3093 }, { "epoch": 2.0, "learning_rate": 5.296801272772109e-06, "loss": 0.4375, "step": 3094 }, { "epoch": 2.0, "learning_rate": 5.290646904502055e-06, "loss": 0.4278, "step": 3095 }, { "epoch": 2.0, "learning_rate": 5.284494827438423e-06, "loss": 0.4194, "step": 3096 }, { "epoch": 2.0, "learning_rate": 5.27834504457434e-06, "loss": 0.4136, "step": 3097 }, { "epoch": 2.0, "learning_rate": 5.2721975589018105e-06, "loss": 0.3896, "step": 3098 }, { "epoch": 2.0, "learning_rate": 5.266052373411726e-06, "loss": 0.3382, "step": 3099 }, { "epoch": 2.0, "learning_rate": 5.25990949109386e-06, "loss": 0.3367, "step": 3100 }, { "epoch": 2.0, "learning_rate": 5.253768914936858e-06, "loss": 0.3222, "step": 3101 }, { "epoch": 2.0, "learning_rate": 5.247630647928254e-06, "loss": 0.322, "step": 3102 }, { "epoch": 2.0, "learning_rate": 5.241494693054447e-06, "loss": 0.312, "step": 3103 }, { "epoch": 2.0, "learning_rate": 5.2353610533007305e-06, "loss": 0.3246, "step": 3104 }, { "epoch": 2.0, "learning_rate": 5.229229731651241e-06, "loss": 0.3385, "step": 3105 }, { "epoch": 2.01, "learning_rate": 5.223100731089017e-06, "loss": 0.3109, "step": 3106 }, { "epoch": 2.01, "learning_rate": 5.2169740545959515e-06, "loss": 0.3103, "step": 3107 }, { "epoch": 2.01, "learning_rate": 5.210849705152809e-06, "loss": 0.3002, "step": 3108 }, { "epoch": 2.01, "learning_rate": 5.204727685739228e-06, "loss": 0.3072, "step": 3109 }, { "epoch": 2.01, "learning_rate": 5.198607999333706e-06, "loss": 0.3324, "step": 3110 }, { "epoch": 2.01, "learning_rate": 5.192490648913607e-06, "loss": 0.2982, "step": 3111 }, { "epoch": 2.01, "learning_rate": 5.186375637455159e-06, "loss": 0.3182, "step": 3112 }, { "epoch": 2.01, "learning_rate": 5.180262967933463e-06, "loss": 0.3097, "step": 3113 }, { "epoch": 2.01, "learning_rate": 5.174152643322454e-06, "loss": 0.298, "step": 3114 }, { "epoch": 2.01, "learning_rate": 5.1680446665949605e-06, "loss": 0.3114, "step": 3115 }, { "epoch": 2.01, "learning_rate": 5.161939040722634e-06, "loss": 0.3197, "step": 3116 }, { "epoch": 2.01, "learning_rate": 5.1558357686760144e-06, "loss": 0.3264, "step": 3117 }, { "epoch": 2.01, "learning_rate": 5.149734853424466e-06, "loss": 0.2973, "step": 3118 }, { "epoch": 2.01, "learning_rate": 5.143636297936233e-06, "loss": 0.3101, "step": 3119 }, { "epoch": 2.01, "learning_rate": 5.137540105178396e-06, "loss": 0.2996, "step": 3120 }, { "epoch": 2.02, "learning_rate": 5.131446278116887e-06, "loss": 0.3088, "step": 3121 }, { "epoch": 2.02, "learning_rate": 5.1253548197165016e-06, "loss": 0.295, "step": 3122 }, { "epoch": 2.02, "learning_rate": 5.119265732940857e-06, "loss": 0.3032, "step": 3123 }, { "epoch": 2.02, "learning_rate": 5.113179020752443e-06, "loss": 0.3196, "step": 3124 }, { "epoch": 2.02, "learning_rate": 5.1070946861125726e-06, "loss": 0.3117, "step": 3125 }, { "epoch": 2.02, "learning_rate": 5.101012731981422e-06, "loss": 0.3333, "step": 3126 }, { "epoch": 2.02, "learning_rate": 5.094933161317985e-06, "loss": 0.3062, "step": 3127 }, { "epoch": 2.02, "learning_rate": 5.088855977080123e-06, "loss": 0.3062, "step": 3128 }, { "epoch": 2.02, "learning_rate": 5.082781182224518e-06, "loss": 0.3217, "step": 3129 }, { "epoch": 2.02, "learning_rate": 5.076708779706695e-06, "loss": 0.3026, "step": 3130 }, { "epoch": 2.02, "learning_rate": 5.070638772481014e-06, "loss": 0.3168, "step": 3131 }, { "epoch": 2.02, "learning_rate": 5.064571163500667e-06, "loss": 0.3202, "step": 3132 }, { "epoch": 2.02, "learning_rate": 5.058505955717694e-06, "loss": 0.3001, "step": 3133 }, { "epoch": 2.02, "learning_rate": 5.05244315208294e-06, "loss": 0.2991, "step": 3134 }, { "epoch": 2.02, "learning_rate": 5.04638275554611e-06, "loss": 0.3181, "step": 3135 }, { "epoch": 2.02, "learning_rate": 5.040324769055709e-06, "loss": 0.3096, "step": 3136 }, { "epoch": 2.03, "learning_rate": 5.034269195559096e-06, "loss": 0.3149, "step": 3137 }, { "epoch": 2.03, "learning_rate": 5.028216038002441e-06, "loss": 0.2996, "step": 3138 }, { "epoch": 2.03, "learning_rate": 5.022165299330738e-06, "loss": 0.2956, "step": 3139 }, { "epoch": 2.03, "learning_rate": 5.016116982487811e-06, "loss": 0.3024, "step": 3140 }, { "epoch": 2.03, "learning_rate": 5.010071090416301e-06, "loss": 0.3107, "step": 3141 }, { "epoch": 2.03, "learning_rate": 5.004027626057672e-06, "loss": 0.3008, "step": 3142 }, { "epoch": 2.03, "learning_rate": 4.9979865923522e-06, "loss": 0.317, "step": 3143 }, { "epoch": 2.03, "learning_rate": 4.991947992238997e-06, "loss": 0.3198, "step": 3144 }, { "epoch": 2.03, "learning_rate": 4.985911828655962e-06, "loss": 0.3035, "step": 3145 }, { "epoch": 2.03, "learning_rate": 4.979878104539836e-06, "loss": 0.3157, "step": 3146 }, { "epoch": 2.03, "learning_rate": 4.973846822826158e-06, "loss": 0.3082, "step": 3147 }, { "epoch": 2.03, "learning_rate": 4.967817986449284e-06, "loss": 0.3098, "step": 3148 }, { "epoch": 2.03, "learning_rate": 4.961791598342377e-06, "loss": 0.3035, "step": 3149 }, { "epoch": 2.03, "learning_rate": 4.955767661437412e-06, "loss": 0.2925, "step": 3150 }, { "epoch": 2.03, "learning_rate": 4.949746178665168e-06, "loss": 0.3033, "step": 3151 }, { "epoch": 2.04, "learning_rate": 4.943727152955235e-06, "loss": 0.3196, "step": 3152 }, { "epoch": 2.04, "learning_rate": 4.937710587236001e-06, "loss": 0.3151, "step": 3153 }, { "epoch": 2.04, "learning_rate": 4.9316964844346635e-06, "loss": 0.3188, "step": 3154 }, { "epoch": 2.04, "learning_rate": 4.925684847477216e-06, "loss": 0.313, "step": 3155 }, { "epoch": 2.04, "learning_rate": 4.9196756792884605e-06, "loss": 0.3094, "step": 3156 }, { "epoch": 2.04, "learning_rate": 4.913668982791989e-06, "loss": 0.2767, "step": 3157 }, { "epoch": 2.04, "learning_rate": 4.907664760910197e-06, "loss": 0.2843, "step": 3158 }, { "epoch": 2.04, "learning_rate": 4.901663016564273e-06, "loss": 0.2943, "step": 3159 }, { "epoch": 2.04, "learning_rate": 4.8956637526742e-06, "loss": 0.3048, "step": 3160 }, { "epoch": 2.04, "learning_rate": 4.889666972158755e-06, "loss": 0.3107, "step": 3161 }, { "epoch": 2.04, "learning_rate": 4.883672677935508e-06, "loss": 0.2947, "step": 3162 }, { "epoch": 2.04, "learning_rate": 4.877680872920818e-06, "loss": 0.3113, "step": 3163 }, { "epoch": 2.04, "learning_rate": 4.871691560029828e-06, "loss": 0.3039, "step": 3164 }, { "epoch": 2.04, "learning_rate": 4.865704742176487e-06, "loss": 0.3293, "step": 3165 }, { "epoch": 2.04, "learning_rate": 4.859720422273501e-06, "loss": 0.3066, "step": 3166 }, { "epoch": 2.04, "learning_rate": 4.8537386032323904e-06, "loss": 0.3013, "step": 3167 }, { "epoch": 2.05, "learning_rate": 4.847759287963432e-06, "loss": 0.2861, "step": 3168 }, { "epoch": 2.05, "learning_rate": 4.841782479375707e-06, "loss": 0.3088, "step": 3169 }, { "epoch": 2.05, "learning_rate": 4.835808180377065e-06, "loss": 0.3134, "step": 3170 }, { "epoch": 2.05, "learning_rate": 4.8298363938741356e-06, "loss": 0.3235, "step": 3171 }, { "epoch": 2.05, "learning_rate": 4.8238671227723285e-06, "loss": 0.2917, "step": 3172 }, { "epoch": 2.05, "learning_rate": 4.817900369975826e-06, "loss": 0.2976, "step": 3173 }, { "epoch": 2.05, "learning_rate": 4.811936138387597e-06, "loss": 0.3102, "step": 3174 }, { "epoch": 2.05, "learning_rate": 4.805974430909361e-06, "loss": 0.3161, "step": 3175 }, { "epoch": 2.05, "learning_rate": 4.800015250441638e-06, "loss": 0.3233, "step": 3176 }, { "epoch": 2.05, "learning_rate": 4.794058599883687e-06, "loss": 0.3019, "step": 3177 }, { "epoch": 2.05, "learning_rate": 4.78810448213357e-06, "loss": 0.3086, "step": 3178 }, { "epoch": 2.05, "learning_rate": 4.7821529000880825e-06, "loss": 0.3018, "step": 3179 }, { "epoch": 2.05, "learning_rate": 4.7762038566428155e-06, "loss": 0.3094, "step": 3180 }, { "epoch": 2.05, "learning_rate": 4.77025735469211e-06, "loss": 0.3098, "step": 3181 }, { "epoch": 2.05, "learning_rate": 4.764313397129066e-06, "loss": 0.2966, "step": 3182 }, { "epoch": 2.06, "learning_rate": 4.758371986845569e-06, "loss": 0.3298, "step": 3183 }, { "epoch": 2.06, "learning_rate": 4.752433126732231e-06, "loss": 0.3146, "step": 3184 }, { "epoch": 2.06, "learning_rate": 4.746496819678455e-06, "loss": 0.3002, "step": 3185 }, { "epoch": 2.06, "learning_rate": 4.740563068572377e-06, "loss": 0.2893, "step": 3186 }, { "epoch": 2.06, "learning_rate": 4.734631876300914e-06, "loss": 0.3073, "step": 3187 }, { "epoch": 2.06, "learning_rate": 4.728703245749707e-06, "loss": 0.3075, "step": 3188 }, { "epoch": 2.06, "learning_rate": 4.722777179803184e-06, "loss": 0.2905, "step": 3189 }, { "epoch": 2.06, "learning_rate": 4.716853681344502e-06, "loss": 0.3105, "step": 3190 }, { "epoch": 2.06, "learning_rate": 4.71093275325558e-06, "loss": 0.2952, "step": 3191 }, { "epoch": 2.06, "learning_rate": 4.7050143984170805e-06, "loss": 0.2964, "step": 3192 }, { "epoch": 2.06, "learning_rate": 4.699098619708414e-06, "loss": 0.3135, "step": 3193 }, { "epoch": 2.06, "learning_rate": 4.693185420007751e-06, "loss": 0.313, "step": 3194 }, { "epoch": 2.06, "learning_rate": 4.6872748021919816e-06, "loss": 0.3113, "step": 3195 }, { "epoch": 2.06, "learning_rate": 4.681366769136769e-06, "loss": 0.2975, "step": 3196 }, { "epoch": 2.06, "learning_rate": 4.675461323716491e-06, "loss": 0.3121, "step": 3197 }, { "epoch": 2.06, "learning_rate": 4.669558468804288e-06, "loss": 0.3049, "step": 3198 }, { "epoch": 2.07, "learning_rate": 4.663658207272032e-06, "loss": 0.3101, "step": 3199 }, { "epoch": 2.07, "learning_rate": 4.657760541990329e-06, "loss": 0.3143, "step": 3200 }, { "epoch": 2.07, "learning_rate": 4.651865475828531e-06, "loss": 0.2975, "step": 3201 }, { "epoch": 2.07, "learning_rate": 4.645973011654717e-06, "loss": 0.3082, "step": 3202 }, { "epoch": 2.07, "learning_rate": 4.640083152335706e-06, "loss": 0.3142, "step": 3203 }, { "epoch": 2.07, "learning_rate": 4.634195900737045e-06, "loss": 0.3028, "step": 3204 }, { "epoch": 2.07, "learning_rate": 4.628311259723015e-06, "loss": 0.3166, "step": 3205 }, { "epoch": 2.07, "learning_rate": 4.622429232156624e-06, "loss": 0.2941, "step": 3206 }, { "epoch": 2.07, "learning_rate": 4.616549820899618e-06, "loss": 0.3249, "step": 3207 }, { "epoch": 2.07, "learning_rate": 4.610673028812459e-06, "loss": 0.2981, "step": 3208 }, { "epoch": 2.07, "learning_rate": 4.604798858754339e-06, "loss": 0.2994, "step": 3209 }, { "epoch": 2.07, "learning_rate": 4.598927313583175e-06, "loss": 0.3334, "step": 3210 }, { "epoch": 2.07, "learning_rate": 4.593058396155604e-06, "loss": 0.3142, "step": 3211 }, { "epoch": 2.07, "learning_rate": 4.587192109326988e-06, "loss": 0.3083, "step": 3212 }, { "epoch": 2.07, "learning_rate": 4.581328455951408e-06, "loss": 0.311, "step": 3213 }, { "epoch": 2.08, "learning_rate": 4.5754674388816625e-06, "loss": 0.3133, "step": 3214 }, { "epoch": 2.08, "learning_rate": 4.569609060969269e-06, "loss": 0.2897, "step": 3215 }, { "epoch": 2.08, "learning_rate": 4.563753325064457e-06, "loss": 0.3083, "step": 3216 }, { "epoch": 2.08, "learning_rate": 4.557900234016185e-06, "loss": 0.2928, "step": 3217 }, { "epoch": 2.08, "learning_rate": 4.552049790672099e-06, "loss": 0.3097, "step": 3218 }, { "epoch": 2.08, "learning_rate": 4.546201997878581e-06, "loss": 0.28, "step": 3219 }, { "epoch": 2.08, "learning_rate": 4.540356858480711e-06, "loss": 0.2992, "step": 3220 }, { "epoch": 2.08, "learning_rate": 4.534514375322283e-06, "loss": 0.2972, "step": 3221 }, { "epoch": 2.08, "learning_rate": 4.528674551245796e-06, "loss": 0.3088, "step": 3222 }, { "epoch": 2.08, "learning_rate": 4.522837389092455e-06, "loss": 0.3168, "step": 3223 }, { "epoch": 2.08, "learning_rate": 4.5170028917021705e-06, "loss": 0.3097, "step": 3224 }, { "epoch": 2.08, "learning_rate": 4.511171061913555e-06, "loss": 0.315, "step": 3225 }, { "epoch": 2.08, "learning_rate": 4.5053419025639355e-06, "loss": 0.3077, "step": 3226 }, { "epoch": 2.08, "learning_rate": 4.499515416489314e-06, "loss": 0.3055, "step": 3227 }, { "epoch": 2.08, "learning_rate": 4.493691606524423e-06, "loss": 0.3139, "step": 3228 }, { "epoch": 2.08, "learning_rate": 4.487870475502661e-06, "loss": 0.3242, "step": 3229 }, { "epoch": 2.09, "learning_rate": 4.482052026256156e-06, "loss": 0.3135, "step": 3230 }, { "epoch": 2.09, "learning_rate": 4.4762362616157e-06, "loss": 0.3012, "step": 3231 }, { "epoch": 2.09, "learning_rate": 4.470423184410804e-06, "loss": 0.3068, "step": 3232 }, { "epoch": 2.09, "learning_rate": 4.464612797469656e-06, "loss": 0.2995, "step": 3233 }, { "epoch": 2.09, "learning_rate": 4.4588051036191395e-06, "loss": 0.3132, "step": 3234 }, { "epoch": 2.09, "learning_rate": 4.453000105684836e-06, "loss": 0.3023, "step": 3235 }, { "epoch": 2.09, "learning_rate": 4.447197806490996e-06, "loss": 0.3096, "step": 3236 }, { "epoch": 2.09, "learning_rate": 4.441398208860582e-06, "loss": 0.3023, "step": 3237 }, { "epoch": 2.09, "learning_rate": 4.4356013156152154e-06, "loss": 0.2979, "step": 3238 }, { "epoch": 2.09, "learning_rate": 4.429807129575229e-06, "loss": 0.3027, "step": 3239 }, { "epoch": 2.09, "learning_rate": 4.424015653559611e-06, "loss": 0.2952, "step": 3240 }, { "epoch": 2.09, "learning_rate": 4.418226890386056e-06, "loss": 0.3131, "step": 3241 }, { "epoch": 2.09, "learning_rate": 4.4124408428709235e-06, "loss": 0.2984, "step": 3242 }, { "epoch": 2.09, "learning_rate": 4.406657513829251e-06, "loss": 0.3005, "step": 3243 }, { "epoch": 2.09, "learning_rate": 4.400876906074772e-06, "loss": 0.3142, "step": 3244 }, { "epoch": 2.1, "learning_rate": 4.395099022419869e-06, "loss": 0.3072, "step": 3245 }, { "epoch": 2.1, "learning_rate": 4.389323865675623e-06, "loss": 0.3035, "step": 3246 }, { "epoch": 2.1, "learning_rate": 4.383551438651765e-06, "loss": 0.298, "step": 3247 }, { "epoch": 2.1, "learning_rate": 4.377781744156727e-06, "loss": 0.3037, "step": 3248 }, { "epoch": 2.1, "learning_rate": 4.372014784997579e-06, "loss": 0.3054, "step": 3249 }, { "epoch": 2.1, "learning_rate": 4.366250563980089e-06, "loss": 0.3212, "step": 3250 }, { "epoch": 2.1, "learning_rate": 4.360489083908675e-06, "loss": 0.3052, "step": 3251 }, { "epoch": 2.1, "learning_rate": 4.35473034758643e-06, "loss": 0.3002, "step": 3252 }, { "epoch": 2.1, "learning_rate": 4.348974357815106e-06, "loss": 0.2952, "step": 3253 }, { "epoch": 2.1, "learning_rate": 4.343221117395122e-06, "loss": 0.3195, "step": 3254 }, { "epoch": 2.1, "learning_rate": 4.3374706291255596e-06, "loss": 0.3109, "step": 3255 }, { "epoch": 2.1, "learning_rate": 4.331722895804158e-06, "loss": 0.3158, "step": 3256 }, { "epoch": 2.1, "learning_rate": 4.32597792022733e-06, "loss": 0.302, "step": 3257 }, { "epoch": 2.1, "learning_rate": 4.32023570519012e-06, "loss": 0.3036, "step": 3258 }, { "epoch": 2.1, "learning_rate": 4.314496253486258e-06, "loss": 0.3252, "step": 3259 }, { "epoch": 2.1, "learning_rate": 4.30875956790811e-06, "loss": 0.3049, "step": 3260 }, { "epoch": 2.11, "learning_rate": 4.303025651246705e-06, "loss": 0.2962, "step": 3261 }, { "epoch": 2.11, "learning_rate": 4.297294506291721e-06, "loss": 0.3244, "step": 3262 }, { "epoch": 2.11, "learning_rate": 4.291566135831492e-06, "loss": 0.326, "step": 3263 }, { "epoch": 2.11, "learning_rate": 4.285840542652997e-06, "loss": 0.3026, "step": 3264 }, { "epoch": 2.11, "learning_rate": 4.280117729541866e-06, "loss": 0.309, "step": 3265 }, { "epoch": 2.11, "learning_rate": 4.274397699282377e-06, "loss": 0.3061, "step": 3266 }, { "epoch": 2.11, "learning_rate": 4.268680454657456e-06, "loss": 0.3085, "step": 3267 }, { "epoch": 2.11, "learning_rate": 4.262965998448665e-06, "loss": 0.3151, "step": 3268 }, { "epoch": 2.11, "learning_rate": 4.257254333436223e-06, "loss": 0.3029, "step": 3269 }, { "epoch": 2.11, "learning_rate": 4.25154546239898e-06, "loss": 0.32, "step": 3270 }, { "epoch": 2.11, "learning_rate": 4.2458393881144335e-06, "loss": 0.2896, "step": 3271 }, { "epoch": 2.11, "learning_rate": 4.240136113358714e-06, "loss": 0.3067, "step": 3272 }, { "epoch": 2.11, "learning_rate": 4.234435640906593e-06, "loss": 0.3204, "step": 3273 }, { "epoch": 2.11, "learning_rate": 4.228737973531481e-06, "loss": 0.3132, "step": 3274 }, { "epoch": 2.11, "learning_rate": 4.2230431140054205e-06, "loss": 0.3137, "step": 3275 }, { "epoch": 2.12, "learning_rate": 4.2173510650990905e-06, "loss": 0.3101, "step": 3276 }, { "epoch": 2.12, "learning_rate": 4.211661829581793e-06, "loss": 0.3078, "step": 3277 }, { "epoch": 2.12, "learning_rate": 4.205975410221486e-06, "loss": 0.3098, "step": 3278 }, { "epoch": 2.12, "learning_rate": 4.200291809784722e-06, "loss": 0.2958, "step": 3279 }, { "epoch": 2.12, "learning_rate": 4.194611031036718e-06, "loss": 0.3107, "step": 3280 }, { "epoch": 2.12, "learning_rate": 4.188933076741284e-06, "loss": 0.3059, "step": 3281 }, { "epoch": 2.12, "learning_rate": 4.183257949660885e-06, "loss": 0.3037, "step": 3282 }, { "epoch": 2.12, "learning_rate": 4.177585652556595e-06, "loss": 0.2777, "step": 3283 }, { "epoch": 2.12, "learning_rate": 4.171916188188113e-06, "loss": 0.2985, "step": 3284 }, { "epoch": 2.12, "learning_rate": 4.166249559313762e-06, "loss": 0.2959, "step": 3285 }, { "epoch": 2.12, "learning_rate": 4.1605857686904806e-06, "loss": 0.3098, "step": 3286 }, { "epoch": 2.12, "learning_rate": 4.154924819073842e-06, "loss": 0.3082, "step": 3287 }, { "epoch": 2.12, "learning_rate": 4.149266713218011e-06, "loss": 0.2915, "step": 3288 }, { "epoch": 2.12, "learning_rate": 4.143611453875798e-06, "loss": 0.2906, "step": 3289 }, { "epoch": 2.12, "learning_rate": 4.1379590437986e-06, "loss": 0.3202, "step": 3290 }, { "epoch": 2.12, "learning_rate": 4.132309485736457e-06, "loss": 0.3019, "step": 3291 }, { "epoch": 2.13, "learning_rate": 4.12666278243799e-06, "loss": 0.3049, "step": 3292 }, { "epoch": 2.13, "learning_rate": 4.1210189366504596e-06, "loss": 0.3046, "step": 3293 }, { "epoch": 2.13, "learning_rate": 4.11537795111972e-06, "loss": 0.2878, "step": 3294 }, { "epoch": 2.13, "learning_rate": 4.1097398285902344e-06, "loss": 0.2948, "step": 3295 }, { "epoch": 2.13, "learning_rate": 4.104104571805088e-06, "loss": 0.2851, "step": 3296 }, { "epoch": 2.13, "learning_rate": 4.098472183505944e-06, "loss": 0.3115, "step": 3297 }, { "epoch": 2.13, "learning_rate": 4.092842666433101e-06, "loss": 0.2991, "step": 3298 }, { "epoch": 2.13, "learning_rate": 4.087216023325433e-06, "loss": 0.3081, "step": 3299 }, { "epoch": 2.13, "learning_rate": 4.0815922569204435e-06, "loss": 0.3274, "step": 3300 }, { "epoch": 2.13, "learning_rate": 4.075971369954205e-06, "loss": 0.3122, "step": 3301 }, { "epoch": 2.13, "learning_rate": 4.070353365161417e-06, "loss": 0.3022, "step": 3302 }, { "epoch": 2.13, "learning_rate": 4.064738245275363e-06, "loss": 0.2886, "step": 3303 }, { "epoch": 2.13, "learning_rate": 4.059126013027924e-06, "loss": 0.3281, "step": 3304 }, { "epoch": 2.13, "learning_rate": 4.053516671149579e-06, "loss": 0.3135, "step": 3305 }, { "epoch": 2.13, "learning_rate": 4.047910222369394e-06, "loss": 0.3061, "step": 3306 }, { "epoch": 2.14, "learning_rate": 4.042306669415046e-06, "loss": 0.2957, "step": 3307 }, { "epoch": 2.14, "learning_rate": 4.0367060150127726e-06, "loss": 0.3186, "step": 3308 }, { "epoch": 2.14, "learning_rate": 4.031108261887434e-06, "loss": 0.3239, "step": 3309 }, { "epoch": 2.14, "learning_rate": 4.02551341276245e-06, "loss": 0.305, "step": 3310 }, { "epoch": 2.14, "learning_rate": 4.019921470359849e-06, "loss": 0.3093, "step": 3311 }, { "epoch": 2.14, "learning_rate": 4.014332437400235e-06, "loss": 0.2962, "step": 3312 }, { "epoch": 2.14, "learning_rate": 4.008746316602799e-06, "loss": 0.3147, "step": 3313 }, { "epoch": 2.14, "learning_rate": 4.0031631106853135e-06, "loss": 0.2879, "step": 3314 }, { "epoch": 2.14, "learning_rate": 3.997582822364135e-06, "loss": 0.3056, "step": 3315 }, { "epoch": 2.14, "learning_rate": 3.9920054543541976e-06, "loss": 0.2967, "step": 3316 }, { "epoch": 2.14, "learning_rate": 3.98643100936902e-06, "loss": 0.2892, "step": 3317 }, { "epoch": 2.14, "learning_rate": 3.980859490120692e-06, "loss": 0.3002, "step": 3318 }, { "epoch": 2.14, "learning_rate": 3.975290899319881e-06, "loss": 0.2962, "step": 3319 }, { "epoch": 2.14, "learning_rate": 3.96972523967584e-06, "loss": 0.3005, "step": 3320 }, { "epoch": 2.14, "learning_rate": 3.964162513896383e-06, "loss": 0.2939, "step": 3321 }, { "epoch": 2.14, "learning_rate": 3.958602724687901e-06, "loss": 0.3039, "step": 3322 }, { "epoch": 2.15, "learning_rate": 3.953045874755359e-06, "loss": 0.3126, "step": 3323 }, { "epoch": 2.15, "learning_rate": 3.947491966802288e-06, "loss": 0.312, "step": 3324 }, { "epoch": 2.15, "learning_rate": 3.941941003530789e-06, "loss": 0.3044, "step": 3325 }, { "epoch": 2.15, "learning_rate": 3.936392987641533e-06, "loss": 0.3039, "step": 3326 }, { "epoch": 2.15, "learning_rate": 3.9308479218337525e-06, "loss": 0.3048, "step": 3327 }, { "epoch": 2.15, "learning_rate": 3.925305808805247e-06, "loss": 0.2925, "step": 3328 }, { "epoch": 2.15, "learning_rate": 3.919766651252378e-06, "loss": 0.2967, "step": 3329 }, { "epoch": 2.15, "learning_rate": 3.914230451870079e-06, "loss": 0.3042, "step": 3330 }, { "epoch": 2.15, "learning_rate": 3.908697213351823e-06, "loss": 0.2893, "step": 3331 }, { "epoch": 2.15, "learning_rate": 3.903166938389664e-06, "loss": 0.2966, "step": 3332 }, { "epoch": 2.15, "learning_rate": 3.897639629674205e-06, "loss": 0.3085, "step": 3333 }, { "epoch": 2.15, "learning_rate": 3.892115289894603e-06, "loss": 0.2912, "step": 3334 }, { "epoch": 2.15, "learning_rate": 3.886593921738575e-06, "loss": 0.3077, "step": 3335 }, { "epoch": 2.15, "learning_rate": 3.881075527892391e-06, "loss": 0.293, "step": 3336 }, { "epoch": 2.15, "learning_rate": 3.875560111040876e-06, "loss": 0.3177, "step": 3337 }, { "epoch": 2.16, "learning_rate": 3.870047673867397e-06, "loss": 0.2937, "step": 3338 }, { "epoch": 2.16, "learning_rate": 3.864538219053894e-06, "loss": 0.3103, "step": 3339 }, { "epoch": 2.16, "learning_rate": 3.859031749280824e-06, "loss": 0.309, "step": 3340 }, { "epoch": 2.16, "learning_rate": 3.853528267227223e-06, "loss": 0.3174, "step": 3341 }, { "epoch": 2.16, "learning_rate": 3.848027775570645e-06, "loss": 0.3352, "step": 3342 }, { "epoch": 2.16, "learning_rate": 3.842530276987215e-06, "loss": 0.3029, "step": 3343 }, { "epoch": 2.16, "learning_rate": 3.837035774151585e-06, "loss": 0.3096, "step": 3344 }, { "epoch": 2.16, "learning_rate": 3.831544269736955e-06, "loss": 0.3197, "step": 3345 }, { "epoch": 2.16, "learning_rate": 3.826055766415067e-06, "loss": 0.2932, "step": 3346 }, { "epoch": 2.16, "learning_rate": 3.820570266856195e-06, "loss": 0.2999, "step": 3347 }, { "epoch": 2.16, "learning_rate": 3.815087773729171e-06, "loss": 0.3052, "step": 3348 }, { "epoch": 2.16, "learning_rate": 3.8096082897013376e-06, "loss": 0.3144, "step": 3349 }, { "epoch": 2.16, "learning_rate": 3.804131817438599e-06, "loss": 0.317, "step": 3350 }, { "epoch": 2.16, "learning_rate": 3.7986583596053715e-06, "loss": 0.2933, "step": 3351 }, { "epoch": 2.16, "learning_rate": 3.793187918864627e-06, "loss": 0.3197, "step": 3352 }, { "epoch": 2.16, "learning_rate": 3.7877204978778458e-06, "loss": 0.2953, "step": 3353 }, { "epoch": 2.17, "learning_rate": 3.7822560993050617e-06, "loss": 0.3049, "step": 3354 }, { "epoch": 2.17, "learning_rate": 3.7767947258048243e-06, "loss": 0.3156, "step": 3355 }, { "epoch": 2.17, "learning_rate": 3.771336380034211e-06, "loss": 0.2956, "step": 3356 }, { "epoch": 2.17, "learning_rate": 3.765881064648841e-06, "loss": 0.2957, "step": 3357 }, { "epoch": 2.17, "learning_rate": 3.760428782302833e-06, "loss": 0.318, "step": 3358 }, { "epoch": 2.17, "learning_rate": 3.7549795356488607e-06, "loss": 0.314, "step": 3359 }, { "epoch": 2.17, "learning_rate": 3.749533327338091e-06, "loss": 0.2998, "step": 3360 }, { "epoch": 2.17, "learning_rate": 3.7440901600202404e-06, "loss": 0.3021, "step": 3361 }, { "epoch": 2.17, "learning_rate": 3.7386500363435196e-06, "loss": 0.3048, "step": 3362 }, { "epoch": 2.17, "learning_rate": 3.733212958954682e-06, "loss": 0.3054, "step": 3363 }, { "epoch": 2.17, "learning_rate": 3.727778930498982e-06, "loss": 0.2865, "step": 3364 }, { "epoch": 2.17, "learning_rate": 3.7223479536202e-06, "loss": 0.3178, "step": 3365 }, { "epoch": 2.17, "learning_rate": 3.716920030960628e-06, "loss": 0.3044, "step": 3366 }, { "epoch": 2.17, "learning_rate": 3.711495165161072e-06, "loss": 0.3184, "step": 3367 }, { "epoch": 2.17, "learning_rate": 3.706073358860851e-06, "loss": 0.3101, "step": 3368 }, { "epoch": 2.18, "learning_rate": 3.700654614697794e-06, "loss": 0.2933, "step": 3369 }, { "epoch": 2.18, "learning_rate": 3.695238935308252e-06, "loss": 0.2898, "step": 3370 }, { "epoch": 2.18, "learning_rate": 3.6898263233270603e-06, "loss": 0.29, "step": 3371 }, { "epoch": 2.18, "learning_rate": 3.6844167813875888e-06, "loss": 0.3156, "step": 3372 }, { "epoch": 2.18, "learning_rate": 3.679010312121697e-06, "loss": 0.3068, "step": 3373 }, { "epoch": 2.18, "learning_rate": 3.673606918159753e-06, "loss": 0.3153, "step": 3374 }, { "epoch": 2.18, "learning_rate": 3.668206602130632e-06, "loss": 0.3026, "step": 3375 }, { "epoch": 2.18, "learning_rate": 3.6628093666617083e-06, "loss": 0.3119, "step": 3376 }, { "epoch": 2.18, "learning_rate": 3.6574152143788578e-06, "loss": 0.3075, "step": 3377 }, { "epoch": 2.18, "learning_rate": 3.652024147906459e-06, "loss": 0.3134, "step": 3378 }, { "epoch": 2.18, "learning_rate": 3.6466361698673856e-06, "loss": 0.3028, "step": 3379 }, { "epoch": 2.18, "learning_rate": 3.6412512828830114e-06, "loss": 0.3087, "step": 3380 }, { "epoch": 2.18, "learning_rate": 3.6358694895732016e-06, "loss": 0.3107, "step": 3381 }, { "epoch": 2.18, "learning_rate": 3.6304907925563273e-06, "loss": 0.3131, "step": 3382 }, { "epoch": 2.18, "learning_rate": 3.62511519444924e-06, "loss": 0.3241, "step": 3383 }, { "epoch": 2.18, "learning_rate": 3.6197426978672923e-06, "loss": 0.3032, "step": 3384 }, { "epoch": 2.19, "learning_rate": 3.6143733054243234e-06, "loss": 0.3276, "step": 3385 }, { "epoch": 2.19, "learning_rate": 3.6090070197326643e-06, "loss": 0.3088, "step": 3386 }, { "epoch": 2.19, "learning_rate": 3.603643843403134e-06, "loss": 0.3097, "step": 3387 }, { "epoch": 2.19, "learning_rate": 3.5982837790450376e-06, "loss": 0.288, "step": 3388 }, { "epoch": 2.19, "learning_rate": 3.592926829266168e-06, "loss": 0.3066, "step": 3389 }, { "epoch": 2.19, "learning_rate": 3.587572996672799e-06, "loss": 0.2931, "step": 3390 }, { "epoch": 2.19, "learning_rate": 3.5822222838697006e-06, "loss": 0.2879, "step": 3391 }, { "epoch": 2.19, "learning_rate": 3.5768746934601007e-06, "loss": 0.2982, "step": 3392 }, { "epoch": 2.19, "learning_rate": 3.5715302280457343e-06, "loss": 0.2998, "step": 3393 }, { "epoch": 2.19, "learning_rate": 3.5661888902268007e-06, "loss": 0.3102, "step": 3394 }, { "epoch": 2.19, "learning_rate": 3.5608506826019797e-06, "loss": 0.3077, "step": 3395 }, { "epoch": 2.19, "learning_rate": 3.5555156077684317e-06, "loss": 0.3046, "step": 3396 }, { "epoch": 2.19, "learning_rate": 3.550183668321788e-06, "loss": 0.2936, "step": 3397 }, { "epoch": 2.19, "learning_rate": 3.54485486685616e-06, "loss": 0.3141, "step": 3398 }, { "epoch": 2.19, "learning_rate": 3.539529205964126e-06, "loss": 0.2999, "step": 3399 }, { "epoch": 2.2, "learning_rate": 3.5342066882367488e-06, "loss": 0.3179, "step": 3400 }, { "epoch": 2.2, "learning_rate": 3.5288873162635407e-06, "loss": 0.2865, "step": 3401 }, { "epoch": 2.2, "learning_rate": 3.5235710926325093e-06, "loss": 0.3005, "step": 3402 }, { "epoch": 2.2, "learning_rate": 3.518258019930103e-06, "loss": 0.2912, "step": 3403 }, { "epoch": 2.2, "learning_rate": 3.5129481007412634e-06, "loss": 0.3005, "step": 3404 }, { "epoch": 2.2, "learning_rate": 3.507641337649381e-06, "loss": 0.3094, "step": 3405 }, { "epoch": 2.2, "learning_rate": 3.502337733236316e-06, "loss": 0.3104, "step": 3406 }, { "epoch": 2.2, "learning_rate": 3.4970372900823914e-06, "loss": 0.302, "step": 3407 }, { "epoch": 2.2, "learning_rate": 3.491740010766389e-06, "loss": 0.3079, "step": 3408 }, { "epoch": 2.2, "learning_rate": 3.4864458978655657e-06, "loss": 0.3073, "step": 3409 }, { "epoch": 2.2, "learning_rate": 3.481154953955611e-06, "loss": 0.2889, "step": 3410 }, { "epoch": 2.2, "learning_rate": 3.475867181610704e-06, "loss": 0.2985, "step": 3411 }, { "epoch": 2.2, "learning_rate": 3.47058258340345e-06, "loss": 0.3165, "step": 3412 }, { "epoch": 2.2, "learning_rate": 3.465301161904936e-06, "loss": 0.3061, "step": 3413 }, { "epoch": 2.2, "learning_rate": 3.4600229196846903e-06, "loss": 0.2999, "step": 3414 }, { "epoch": 2.2, "learning_rate": 3.454747859310694e-06, "loss": 0.3016, "step": 3415 }, { "epoch": 2.21, "learning_rate": 3.449475983349385e-06, "loss": 0.3379, "step": 3416 }, { "epoch": 2.21, "learning_rate": 3.44420729436565e-06, "loss": 0.2948, "step": 3417 }, { "epoch": 2.21, "learning_rate": 3.438941794922824e-06, "loss": 0.31, "step": 3418 }, { "epoch": 2.21, "learning_rate": 3.4336794875826884e-06, "loss": 0.3119, "step": 3419 }, { "epoch": 2.21, "learning_rate": 3.4284203749054834e-06, "loss": 0.2931, "step": 3420 }, { "epoch": 2.21, "learning_rate": 3.4231644594498724e-06, "loss": 0.299, "step": 3421 }, { "epoch": 2.21, "learning_rate": 3.4179117437729915e-06, "loss": 0.3192, "step": 3422 }, { "epoch": 2.21, "learning_rate": 3.4126622304303914e-06, "loss": 0.3061, "step": 3423 }, { "epoch": 2.21, "learning_rate": 3.4074159219760884e-06, "loss": 0.3013, "step": 3424 }, { "epoch": 2.21, "learning_rate": 3.402172820962526e-06, "loss": 0.3067, "step": 3425 }, { "epoch": 2.21, "learning_rate": 3.396932929940592e-06, "loss": 0.3222, "step": 3426 }, { "epoch": 2.21, "learning_rate": 3.391696251459611e-06, "loss": 0.3149, "step": 3427 }, { "epoch": 2.21, "learning_rate": 3.386462788067344e-06, "loss": 0.3002, "step": 3428 }, { "epoch": 2.21, "learning_rate": 3.381232542309991e-06, "loss": 0.2935, "step": 3429 }, { "epoch": 2.21, "learning_rate": 3.376005516732184e-06, "loss": 0.3162, "step": 3430 }, { "epoch": 2.22, "learning_rate": 3.370781713876987e-06, "loss": 0.2967, "step": 3431 }, { "epoch": 2.22, "learning_rate": 3.3655611362858966e-06, "loss": 0.2712, "step": 3432 }, { "epoch": 2.22, "learning_rate": 3.3603437864988466e-06, "loss": 0.3112, "step": 3433 }, { "epoch": 2.22, "learning_rate": 3.3551296670541923e-06, "loss": 0.2902, "step": 3434 }, { "epoch": 2.22, "learning_rate": 3.3499187804887213e-06, "loss": 0.3089, "step": 3435 }, { "epoch": 2.22, "learning_rate": 3.3447111293376468e-06, "loss": 0.3202, "step": 3436 }, { "epoch": 2.22, "learning_rate": 3.3395067161346073e-06, "loss": 0.3122, "step": 3437 }, { "epoch": 2.22, "learning_rate": 3.334305543411669e-06, "loss": 0.3105, "step": 3438 }, { "epoch": 2.22, "learning_rate": 3.3291076136993185e-06, "loss": 0.3028, "step": 3439 }, { "epoch": 2.22, "learning_rate": 3.323912929526465e-06, "loss": 0.3003, "step": 3440 }, { "epoch": 2.22, "learning_rate": 3.3187214934204413e-06, "loss": 0.3096, "step": 3441 }, { "epoch": 2.22, "learning_rate": 3.3135333079069943e-06, "loss": 0.32, "step": 3442 }, { "epoch": 2.22, "learning_rate": 3.3083483755102996e-06, "loss": 0.3104, "step": 3443 }, { "epoch": 2.22, "learning_rate": 3.30316669875294e-06, "loss": 0.3202, "step": 3444 }, { "epoch": 2.22, "learning_rate": 3.297988280155917e-06, "loss": 0.3141, "step": 3445 }, { "epoch": 2.22, "learning_rate": 3.2928131222386507e-06, "loss": 0.3064, "step": 3446 }, { "epoch": 2.23, "learning_rate": 3.2876412275189697e-06, "loss": 0.309, "step": 3447 }, { "epoch": 2.23, "learning_rate": 3.282472598513119e-06, "loss": 0.2836, "step": 3448 }, { "epoch": 2.23, "learning_rate": 3.2773072377357517e-06, "loss": 0.3046, "step": 3449 }, { "epoch": 2.23, "learning_rate": 3.2721451476999333e-06, "loss": 0.2973, "step": 3450 }, { "epoch": 2.23, "learning_rate": 3.2669863309171325e-06, "loss": 0.2904, "step": 3451 }, { "epoch": 2.23, "learning_rate": 3.2618307898972413e-06, "loss": 0.3095, "step": 3452 }, { "epoch": 2.23, "learning_rate": 3.2566785271485313e-06, "loss": 0.2919, "step": 3453 }, { "epoch": 2.23, "learning_rate": 3.2515295451777105e-06, "loss": 0.2983, "step": 3454 }, { "epoch": 2.23, "learning_rate": 3.246383846489859e-06, "loss": 0.323, "step": 3455 }, { "epoch": 2.23, "learning_rate": 3.2412414335884866e-06, "loss": 0.2961, "step": 3456 }, { "epoch": 2.23, "learning_rate": 3.2361023089754894e-06, "loss": 0.3035, "step": 3457 }, { "epoch": 2.23, "learning_rate": 3.230966475151166e-06, "loss": 0.2977, "step": 3458 }, { "epoch": 2.23, "learning_rate": 3.2258339346142176e-06, "loss": 0.3285, "step": 3459 }, { "epoch": 2.23, "learning_rate": 3.2207046898617365e-06, "loss": 0.3137, "step": 3460 }, { "epoch": 2.23, "learning_rate": 3.215578743389225e-06, "loss": 0.3075, "step": 3461 }, { "epoch": 2.24, "learning_rate": 3.2104560976905596e-06, "loss": 0.2913, "step": 3462 }, { "epoch": 2.24, "learning_rate": 3.205336755258035e-06, "loss": 0.3135, "step": 3463 }, { "epoch": 2.24, "learning_rate": 3.2002207185823155e-06, "loss": 0.3073, "step": 3464 }, { "epoch": 2.24, "learning_rate": 3.195107990152475e-06, "loss": 0.3028, "step": 3465 }, { "epoch": 2.24, "learning_rate": 3.18999857245597e-06, "loss": 0.3049, "step": 3466 }, { "epoch": 2.24, "learning_rate": 3.1848924679786476e-06, "loss": 0.318, "step": 3467 }, { "epoch": 2.24, "learning_rate": 3.1797896792047422e-06, "loss": 0.3012, "step": 3468 }, { "epoch": 2.24, "learning_rate": 3.1746902086168707e-06, "loss": 0.3027, "step": 3469 }, { "epoch": 2.24, "learning_rate": 3.1695940586960527e-06, "loss": 0.2858, "step": 3470 }, { "epoch": 2.24, "learning_rate": 3.164501231921666e-06, "loss": 0.3154, "step": 3471 }, { "epoch": 2.24, "learning_rate": 3.1594117307714977e-06, "loss": 0.302, "step": 3472 }, { "epoch": 2.24, "learning_rate": 3.1543255577216935e-06, "loss": 0.3045, "step": 3473 }, { "epoch": 2.24, "learning_rate": 3.1492427152468e-06, "loss": 0.2917, "step": 3474 }, { "epoch": 2.24, "learning_rate": 3.1441632058197315e-06, "loss": 0.2885, "step": 3475 }, { "epoch": 2.24, "learning_rate": 3.1390870319117838e-06, "loss": 0.2912, "step": 3476 }, { "epoch": 2.24, "learning_rate": 3.1340141959926298e-06, "loss": 0.3217, "step": 3477 }, { "epoch": 2.25, "learning_rate": 3.1289447005303186e-06, "loss": 0.2964, "step": 3478 }, { "epoch": 2.25, "learning_rate": 3.1238785479912747e-06, "loss": 0.3229, "step": 3479 }, { "epoch": 2.25, "learning_rate": 3.118815740840294e-06, "loss": 0.2917, "step": 3480 }, { "epoch": 2.25, "learning_rate": 3.1137562815405475e-06, "loss": 0.3017, "step": 3481 }, { "epoch": 2.25, "learning_rate": 3.1087001725535726e-06, "loss": 0.3275, "step": 3482 }, { "epoch": 2.25, "learning_rate": 3.103647416339287e-06, "loss": 0.3173, "step": 3483 }, { "epoch": 2.25, "learning_rate": 3.098598015355967e-06, "loss": 0.3116, "step": 3484 }, { "epoch": 2.25, "learning_rate": 3.093551972060259e-06, "loss": 0.2971, "step": 3485 }, { "epoch": 2.25, "learning_rate": 3.088509288907179e-06, "loss": 0.3119, "step": 3486 }, { "epoch": 2.25, "learning_rate": 3.0834699683501047e-06, "loss": 0.3091, "step": 3487 }, { "epoch": 2.25, "learning_rate": 3.0784340128407786e-06, "loss": 0.2985, "step": 3488 }, { "epoch": 2.25, "learning_rate": 3.0734014248293077e-06, "loss": 0.2977, "step": 3489 }, { "epoch": 2.25, "learning_rate": 3.0683722067641587e-06, "loss": 0.3262, "step": 3490 }, { "epoch": 2.25, "learning_rate": 3.0633463610921588e-06, "loss": 0.3155, "step": 3491 }, { "epoch": 2.25, "learning_rate": 3.0583238902584976e-06, "loss": 0.3191, "step": 3492 }, { "epoch": 2.26, "learning_rate": 3.0533047967067153e-06, "loss": 0.3024, "step": 3493 }, { "epoch": 2.26, "learning_rate": 3.0482890828787205e-06, "loss": 0.2974, "step": 3494 }, { "epoch": 2.26, "learning_rate": 3.043276751214769e-06, "loss": 0.3092, "step": 3495 }, { "epoch": 2.26, "learning_rate": 3.038267804153472e-06, "loss": 0.3008, "step": 3496 }, { "epoch": 2.26, "learning_rate": 3.033262244131795e-06, "loss": 0.3103, "step": 3497 }, { "epoch": 2.26, "learning_rate": 3.0282600735850564e-06, "loss": 0.3019, "step": 3498 }, { "epoch": 2.26, "learning_rate": 3.023261294946925e-06, "loss": 0.2985, "step": 3499 }, { "epoch": 2.26, "learning_rate": 3.0182659106494195e-06, "loss": 0.2988, "step": 3500 }, { "epoch": 2.26, "learning_rate": 3.0132739231229057e-06, "loss": 0.2975, "step": 3501 }, { "epoch": 2.26, "learning_rate": 3.0082853347960983e-06, "loss": 0.3029, "step": 3502 }, { "epoch": 2.26, "learning_rate": 3.0033001480960567e-06, "loss": 0.3203, "step": 3503 }, { "epoch": 2.26, "learning_rate": 2.998318365448194e-06, "loss": 0.3159, "step": 3504 }, { "epoch": 2.26, "learning_rate": 2.9933399892762483e-06, "loss": 0.3168, "step": 3505 }, { "epoch": 2.26, "learning_rate": 2.9883650220023196e-06, "loss": 0.3139, "step": 3506 }, { "epoch": 2.26, "learning_rate": 2.9833934660468412e-06, "loss": 0.3018, "step": 3507 }, { "epoch": 2.26, "learning_rate": 2.9784253238285844e-06, "loss": 0.2865, "step": 3508 }, { "epoch": 2.27, "learning_rate": 2.9734605977646638e-06, "loss": 0.309, "step": 3509 }, { "epoch": 2.27, "learning_rate": 2.9684992902705303e-06, "loss": 0.2851, "step": 3510 }, { "epoch": 2.27, "learning_rate": 2.963541403759972e-06, "loss": 0.3121, "step": 3511 }, { "epoch": 2.27, "learning_rate": 2.9585869406451083e-06, "loss": 0.3022, "step": 3512 }, { "epoch": 2.27, "learning_rate": 2.9536359033364083e-06, "loss": 0.3113, "step": 3513 }, { "epoch": 2.27, "learning_rate": 2.9486882942426486e-06, "loss": 0.3131, "step": 3514 }, { "epoch": 2.27, "learning_rate": 2.9437441157709668e-06, "loss": 0.3085, "step": 3515 }, { "epoch": 2.27, "learning_rate": 2.938803370326804e-06, "loss": 0.304, "step": 3516 }, { "epoch": 2.27, "learning_rate": 2.9338660603139533e-06, "loss": 0.3141, "step": 3517 }, { "epoch": 2.27, "learning_rate": 2.9289321881345257e-06, "loss": 0.3231, "step": 3518 }, { "epoch": 2.27, "learning_rate": 2.924001756188959e-06, "loss": 0.3034, "step": 3519 }, { "epoch": 2.27, "learning_rate": 2.9190747668760213e-06, "loss": 0.2911, "step": 3520 }, { "epoch": 2.27, "learning_rate": 2.9141512225927993e-06, "loss": 0.301, "step": 3521 }, { "epoch": 2.27, "learning_rate": 2.9092311257347182e-06, "loss": 0.3219, "step": 3522 }, { "epoch": 2.27, "learning_rate": 2.9043144786955048e-06, "loss": 0.3056, "step": 3523 }, { "epoch": 2.28, "learning_rate": 2.899401283867229e-06, "loss": 0.3245, "step": 3524 }, { "epoch": 2.28, "learning_rate": 2.89449154364026e-06, "loss": 0.3058, "step": 3525 }, { "epoch": 2.28, "learning_rate": 2.8895852604033047e-06, "loss": 0.2909, "step": 3526 }, { "epoch": 2.28, "learning_rate": 2.88468243654338e-06, "loss": 0.286, "step": 3527 }, { "epoch": 2.28, "learning_rate": 2.8797830744458177e-06, "loss": 0.3009, "step": 3528 }, { "epoch": 2.28, "learning_rate": 2.87488717649427e-06, "loss": 0.3004, "step": 3529 }, { "epoch": 2.28, "learning_rate": 2.869994745070699e-06, "loss": 0.3169, "step": 3530 }, { "epoch": 2.28, "learning_rate": 2.865105782555385e-06, "loss": 0.2829, "step": 3531 }, { "epoch": 2.28, "learning_rate": 2.860220291326915e-06, "loss": 0.2981, "step": 3532 }, { "epoch": 2.28, "learning_rate": 2.8553382737621995e-06, "loss": 0.3047, "step": 3533 }, { "epoch": 2.28, "learning_rate": 2.850459732236438e-06, "loss": 0.3002, "step": 3534 }, { "epoch": 2.28, "learning_rate": 2.845584669123159e-06, "loss": 0.3134, "step": 3535 }, { "epoch": 2.28, "learning_rate": 2.840713086794189e-06, "loss": 0.2833, "step": 3536 }, { "epoch": 2.28, "learning_rate": 2.8358449876196625e-06, "loss": 0.3033, "step": 3537 }, { "epoch": 2.28, "learning_rate": 2.8309803739680184e-06, "loss": 0.3051, "step": 3538 }, { "epoch": 2.28, "learning_rate": 2.8261192482060017e-06, "loss": 0.314, "step": 3539 }, { "epoch": 2.29, "learning_rate": 2.8212616126986604e-06, "loss": 0.3079, "step": 3540 }, { "epoch": 2.29, "learning_rate": 2.8164074698093414e-06, "loss": 0.302, "step": 3541 }, { "epoch": 2.29, "learning_rate": 2.811556821899696e-06, "loss": 0.2906, "step": 3542 }, { "epoch": 2.29, "learning_rate": 2.8067096713296747e-06, "loss": 0.3033, "step": 3543 }, { "epoch": 2.29, "learning_rate": 2.801866020457521e-06, "loss": 0.2993, "step": 3544 }, { "epoch": 2.29, "learning_rate": 2.7970258716397856e-06, "loss": 0.3019, "step": 3545 }, { "epoch": 2.29, "learning_rate": 2.7921892272313087e-06, "loss": 0.2959, "step": 3546 }, { "epoch": 2.29, "learning_rate": 2.787356089585227e-06, "loss": 0.3064, "step": 3547 }, { "epoch": 2.29, "learning_rate": 2.7825264610529703e-06, "loss": 0.3255, "step": 3548 }, { "epoch": 2.29, "learning_rate": 2.777700343984261e-06, "loss": 0.3053, "step": 3549 }, { "epoch": 2.29, "learning_rate": 2.7728777407271133e-06, "loss": 0.3094, "step": 3550 }, { "epoch": 2.29, "learning_rate": 2.768058653627834e-06, "loss": 0.3215, "step": 3551 }, { "epoch": 2.29, "learning_rate": 2.7632430850310175e-06, "loss": 0.3044, "step": 3552 }, { "epoch": 2.29, "learning_rate": 2.7584310372795462e-06, "loss": 0.3101, "step": 3553 }, { "epoch": 2.29, "learning_rate": 2.7536225127145897e-06, "loss": 0.3249, "step": 3554 }, { "epoch": 2.3, "learning_rate": 2.7488175136756e-06, "loss": 0.3059, "step": 3555 }, { "epoch": 2.3, "learning_rate": 2.7440160425003236e-06, "loss": 0.3059, "step": 3556 }, { "epoch": 2.3, "learning_rate": 2.739218101524782e-06, "loss": 0.3126, "step": 3557 }, { "epoch": 2.3, "learning_rate": 2.7344236930832814e-06, "loss": 0.3044, "step": 3558 }, { "epoch": 2.3, "learning_rate": 2.729632819508409e-06, "loss": 0.3049, "step": 3559 }, { "epoch": 2.3, "learning_rate": 2.7248454831310335e-06, "loss": 0.3244, "step": 3560 }, { "epoch": 2.3, "learning_rate": 2.720061686280302e-06, "loss": 0.2997, "step": 3561 }, { "epoch": 2.3, "learning_rate": 2.7152814312836397e-06, "loss": 0.3023, "step": 3562 }, { "epoch": 2.3, "learning_rate": 2.710504720466747e-06, "loss": 0.3002, "step": 3563 }, { "epoch": 2.3, "learning_rate": 2.7057315561536e-06, "loss": 0.2978, "step": 3564 }, { "epoch": 2.3, "learning_rate": 2.7009619406664587e-06, "loss": 0.2905, "step": 3565 }, { "epoch": 2.3, "learning_rate": 2.6961958763258366e-06, "loss": 0.2964, "step": 3566 }, { "epoch": 2.3, "learning_rate": 2.6914333654505444e-06, "loss": 0.3022, "step": 3567 }, { "epoch": 2.3, "learning_rate": 2.686674410357637e-06, "loss": 0.3042, "step": 3568 }, { "epoch": 2.3, "learning_rate": 2.681919013362464e-06, "loss": 0.2877, "step": 3569 }, { "epoch": 2.3, "learning_rate": 2.67716717677863e-06, "loss": 0.2892, "step": 3570 }, { "epoch": 2.31, "learning_rate": 2.6724189029180103e-06, "loss": 0.3109, "step": 3571 }, { "epoch": 2.31, "learning_rate": 2.667674194090748e-06, "loss": 0.3116, "step": 3572 }, { "epoch": 2.31, "learning_rate": 2.6629330526052465e-06, "loss": 0.2959, "step": 3573 }, { "epoch": 2.31, "learning_rate": 2.6581954807681877e-06, "loss": 0.2887, "step": 3574 }, { "epoch": 2.31, "learning_rate": 2.653461480884494e-06, "loss": 0.2944, "step": 3575 }, { "epoch": 2.31, "learning_rate": 2.6487310552573776e-06, "loss": 0.3099, "step": 3576 }, { "epoch": 2.31, "learning_rate": 2.644004206188282e-06, "loss": 0.2974, "step": 3577 }, { "epoch": 2.31, "learning_rate": 2.639280935976938e-06, "loss": 0.3256, "step": 3578 }, { "epoch": 2.31, "learning_rate": 2.634561246921317e-06, "loss": 0.2867, "step": 3579 }, { "epoch": 2.31, "learning_rate": 2.6298451413176564e-06, "loss": 0.2859, "step": 3580 }, { "epoch": 2.31, "learning_rate": 2.625132621460447e-06, "loss": 0.2907, "step": 3581 }, { "epoch": 2.31, "learning_rate": 2.620423689642432e-06, "loss": 0.3027, "step": 3582 }, { "epoch": 2.31, "learning_rate": 2.615718348154623e-06, "loss": 0.3079, "step": 3583 }, { "epoch": 2.31, "learning_rate": 2.6110165992862635e-06, "loss": 0.3114, "step": 3584 }, { "epoch": 2.31, "learning_rate": 2.6063184453248704e-06, "loss": 0.2922, "step": 3585 }, { "epoch": 2.32, "learning_rate": 2.6016238885561907e-06, "loss": 0.2917, "step": 3586 }, { "epoch": 2.32, "learning_rate": 2.596932931264241e-06, "loss": 0.3341, "step": 3587 }, { "epoch": 2.32, "learning_rate": 2.592245575731274e-06, "loss": 0.3116, "step": 3588 }, { "epoch": 2.32, "learning_rate": 2.5875618242377944e-06, "loss": 0.3036, "step": 3589 }, { "epoch": 2.32, "learning_rate": 2.582881679062552e-06, "loss": 0.2907, "step": 3590 }, { "epoch": 2.32, "learning_rate": 2.578205142482545e-06, "loss": 0.2815, "step": 3591 }, { "epoch": 2.32, "learning_rate": 2.5735322167730116e-06, "loss": 0.3031, "step": 3592 }, { "epoch": 2.32, "learning_rate": 2.5688629042074377e-06, "loss": 0.3024, "step": 3593 }, { "epoch": 2.32, "learning_rate": 2.5641972070575462e-06, "loss": 0.3043, "step": 3594 }, { "epoch": 2.32, "learning_rate": 2.5595351275933032e-06, "loss": 0.2905, "step": 3595 }, { "epoch": 2.32, "learning_rate": 2.5548766680829207e-06, "loss": 0.2833, "step": 3596 }, { "epoch": 2.32, "learning_rate": 2.550221830792842e-06, "loss": 0.3166, "step": 3597 }, { "epoch": 2.32, "learning_rate": 2.5455706179877483e-06, "loss": 0.2884, "step": 3598 }, { "epoch": 2.32, "learning_rate": 2.5409230319305613e-06, "loss": 0.3072, "step": 3599 }, { "epoch": 2.32, "learning_rate": 2.5362790748824363e-06, "loss": 0.294, "step": 3600 }, { "epoch": 2.32, "learning_rate": 2.5316387491027615e-06, "loss": 0.2915, "step": 3601 }, { "epoch": 2.33, "learning_rate": 2.5270020568491628e-06, "loss": 0.3008, "step": 3602 }, { "epoch": 2.33, "learning_rate": 2.5223690003774925e-06, "loss": 0.302, "step": 3603 }, { "epoch": 2.33, "learning_rate": 2.517739581941839e-06, "loss": 0.2956, "step": 3604 }, { "epoch": 2.33, "learning_rate": 2.5131138037945147e-06, "loss": 0.2962, "step": 3605 }, { "epoch": 2.33, "learning_rate": 2.5084916681860716e-06, "loss": 0.3132, "step": 3606 }, { "epoch": 2.33, "learning_rate": 2.503873177365279e-06, "loss": 0.3201, "step": 3607 }, { "epoch": 2.33, "learning_rate": 2.4992583335791375e-06, "loss": 0.3106, "step": 3608 }, { "epoch": 2.33, "learning_rate": 2.4946471390728734e-06, "loss": 0.301, "step": 3609 }, { "epoch": 2.33, "learning_rate": 2.490039596089935e-06, "loss": 0.3005, "step": 3610 }, { "epoch": 2.33, "learning_rate": 2.485435706871996e-06, "loss": 0.287, "step": 3611 }, { "epoch": 2.33, "learning_rate": 2.4808354736589525e-06, "loss": 0.3239, "step": 3612 }, { "epoch": 2.33, "learning_rate": 2.4762388986889228e-06, "loss": 0.3004, "step": 3613 }, { "epoch": 2.33, "learning_rate": 2.471645984198241e-06, "loss": 0.2954, "step": 3614 }, { "epoch": 2.33, "learning_rate": 2.467056732421471e-06, "loss": 0.2976, "step": 3615 }, { "epoch": 2.33, "learning_rate": 2.4624711455913764e-06, "loss": 0.2856, "step": 3616 }, { "epoch": 2.34, "learning_rate": 2.4578892259389597e-06, "loss": 0.2951, "step": 3617 }, { "epoch": 2.34, "learning_rate": 2.453310975693417e-06, "loss": 0.2989, "step": 3618 }, { "epoch": 2.34, "learning_rate": 2.448736397082179e-06, "loss": 0.3075, "step": 3619 }, { "epoch": 2.34, "learning_rate": 2.444165492330879e-06, "loss": 0.3065, "step": 3620 }, { "epoch": 2.34, "learning_rate": 2.4395982636633632e-06, "loss": 0.3112, "step": 3621 }, { "epoch": 2.34, "learning_rate": 2.435034713301693e-06, "loss": 0.2952, "step": 3622 }, { "epoch": 2.34, "learning_rate": 2.4304748434661372e-06, "loss": 0.2963, "step": 3623 }, { "epoch": 2.34, "learning_rate": 2.425918656375177e-06, "loss": 0.2883, "step": 3624 }, { "epoch": 2.34, "learning_rate": 2.4213661542454956e-06, "loss": 0.2947, "step": 3625 }, { "epoch": 2.34, "learning_rate": 2.416817339291997e-06, "loss": 0.2976, "step": 3626 }, { "epoch": 2.34, "learning_rate": 2.412272213727772e-06, "loss": 0.2961, "step": 3627 }, { "epoch": 2.34, "learning_rate": 2.4077307797641357e-06, "loss": 0.3059, "step": 3628 }, { "epoch": 2.34, "learning_rate": 2.4031930396105883e-06, "loss": 0.3018, "step": 3629 }, { "epoch": 2.34, "learning_rate": 2.3986589954748505e-06, "loss": 0.2983, "step": 3630 }, { "epoch": 2.34, "learning_rate": 2.3941286495628357e-06, "loss": 0.3206, "step": 3631 }, { "epoch": 2.34, "learning_rate": 2.389602004078657e-06, "loss": 0.307, "step": 3632 }, { "epoch": 2.35, "learning_rate": 2.385079061224631e-06, "loss": 0.2919, "step": 3633 }, { "epoch": 2.35, "learning_rate": 2.380559823201267e-06, "loss": 0.2964, "step": 3634 }, { "epoch": 2.35, "learning_rate": 2.3760442922072867e-06, "loss": 0.312, "step": 3635 }, { "epoch": 2.35, "learning_rate": 2.3715324704395846e-06, "loss": 0.3129, "step": 3636 }, { "epoch": 2.35, "learning_rate": 2.3670243600932763e-06, "loss": 0.3012, "step": 3637 }, { "epoch": 2.35, "learning_rate": 2.362519963361647e-06, "loss": 0.3035, "step": 3638 }, { "epoch": 2.35, "learning_rate": 2.3580192824361957e-06, "loss": 0.3072, "step": 3639 }, { "epoch": 2.35, "learning_rate": 2.3535223195066025e-06, "loss": 0.2791, "step": 3640 }, { "epoch": 2.35, "learning_rate": 2.3490290767607414e-06, "loss": 0.2798, "step": 3641 }, { "epoch": 2.35, "learning_rate": 2.3445395563846763e-06, "loss": 0.2979, "step": 3642 }, { "epoch": 2.35, "learning_rate": 2.3400537605626592e-06, "loss": 0.3126, "step": 3643 }, { "epoch": 2.35, "learning_rate": 2.335571691477132e-06, "loss": 0.2904, "step": 3644 }, { "epoch": 2.35, "learning_rate": 2.33109335130872e-06, "loss": 0.3151, "step": 3645 }, { "epoch": 2.35, "learning_rate": 2.326618742236242e-06, "loss": 0.3038, "step": 3646 }, { "epoch": 2.35, "learning_rate": 2.3221478664366882e-06, "loss": 0.3024, "step": 3647 }, { "epoch": 2.36, "learning_rate": 2.3176807260852475e-06, "loss": 0.315, "step": 3648 }, { "epoch": 2.36, "learning_rate": 2.313217323355281e-06, "loss": 0.3148, "step": 3649 }, { "epoch": 2.36, "learning_rate": 2.3087576604183373e-06, "loss": 0.3149, "step": 3650 }, { "epoch": 2.36, "learning_rate": 2.3043017394441394e-06, "loss": 0.3155, "step": 3651 }, { "epoch": 2.36, "learning_rate": 2.2998495626005955e-06, "loss": 0.3114, "step": 3652 }, { "epoch": 2.36, "learning_rate": 2.2954011320537895e-06, "loss": 0.305, "step": 3653 }, { "epoch": 2.36, "learning_rate": 2.2909564499679825e-06, "loss": 0.3056, "step": 3654 }, { "epoch": 2.36, "learning_rate": 2.286515518505614e-06, "loss": 0.3117, "step": 3655 }, { "epoch": 2.36, "learning_rate": 2.282078339827293e-06, "loss": 0.3053, "step": 3656 }, { "epoch": 2.36, "learning_rate": 2.2776449160918136e-06, "loss": 0.2876, "step": 3657 }, { "epoch": 2.36, "learning_rate": 2.273215249456132e-06, "loss": 0.2967, "step": 3658 }, { "epoch": 2.36, "learning_rate": 2.2687893420753824e-06, "loss": 0.3044, "step": 3659 }, { "epoch": 2.36, "learning_rate": 2.264367196102869e-06, "loss": 0.3041, "step": 3660 }, { "epoch": 2.36, "learning_rate": 2.259948813690064e-06, "loss": 0.3075, "step": 3661 }, { "epoch": 2.36, "learning_rate": 2.25553419698661e-06, "loss": 0.2938, "step": 3662 }, { "epoch": 2.36, "learning_rate": 2.2511233481403173e-06, "loss": 0.2923, "step": 3663 }, { "epoch": 2.37, "learning_rate": 2.2467162692971655e-06, "loss": 0.2995, "step": 3664 }, { "epoch": 2.37, "learning_rate": 2.242312962601295e-06, "loss": 0.2901, "step": 3665 }, { "epoch": 2.37, "learning_rate": 2.2379134301950133e-06, "loss": 0.3197, "step": 3666 }, { "epoch": 2.37, "learning_rate": 2.233517674218799e-06, "loss": 0.2972, "step": 3667 }, { "epoch": 2.37, "learning_rate": 2.229125696811275e-06, "loss": 0.3051, "step": 3668 }, { "epoch": 2.37, "learning_rate": 2.2247375001092475e-06, "loss": 0.3149, "step": 3669 }, { "epoch": 2.37, "learning_rate": 2.2203530862476684e-06, "loss": 0.3027, "step": 3670 }, { "epoch": 2.37, "learning_rate": 2.2159724573596552e-06, "loss": 0.305, "step": 3671 }, { "epoch": 2.37, "learning_rate": 2.2115956155764817e-06, "loss": 0.2982, "step": 3672 }, { "epoch": 2.37, "learning_rate": 2.2072225630275803e-06, "loss": 0.3109, "step": 3673 }, { "epoch": 2.37, "learning_rate": 2.2028533018405396e-06, "loss": 0.3181, "step": 3674 }, { "epoch": 2.37, "learning_rate": 2.198487834141102e-06, "loss": 0.2949, "step": 3675 }, { "epoch": 2.37, "learning_rate": 2.1941261620531718e-06, "loss": 0.2852, "step": 3676 }, { "epoch": 2.37, "learning_rate": 2.1897682876987925e-06, "loss": 0.3021, "step": 3677 }, { "epoch": 2.37, "learning_rate": 2.185414213198177e-06, "loss": 0.3116, "step": 3678 }, { "epoch": 2.38, "learning_rate": 2.18106394066967e-06, "loss": 0.2962, "step": 3679 }, { "epoch": 2.38, "learning_rate": 2.17671747222979e-06, "loss": 0.3161, "step": 3680 }, { "epoch": 2.38, "learning_rate": 2.1723748099931785e-06, "loss": 0.2981, "step": 3681 }, { "epoch": 2.38, "learning_rate": 2.168035956072648e-06, "loss": 0.2937, "step": 3682 }, { "epoch": 2.38, "learning_rate": 2.1637009125791466e-06, "loss": 0.3018, "step": 3683 }, { "epoch": 2.38, "learning_rate": 2.1593696816217667e-06, "loss": 0.2865, "step": 3684 }, { "epoch": 2.38, "learning_rate": 2.1550422653077584e-06, "loss": 0.3018, "step": 3685 }, { "epoch": 2.38, "learning_rate": 2.1507186657424962e-06, "loss": 0.2958, "step": 3686 }, { "epoch": 2.38, "learning_rate": 2.1463988850295204e-06, "loss": 0.3072, "step": 3687 }, { "epoch": 2.38, "learning_rate": 2.142082925270489e-06, "loss": 0.3163, "step": 3688 }, { "epoch": 2.38, "learning_rate": 2.1377707885652264e-06, "loss": 0.3122, "step": 3689 }, { "epoch": 2.38, "learning_rate": 2.133462477011673e-06, "loss": 0.3034, "step": 3690 }, { "epoch": 2.38, "learning_rate": 2.129157992705927e-06, "loss": 0.2875, "step": 3691 }, { "epoch": 2.38, "learning_rate": 2.1248573377422155e-06, "loss": 0.3007, "step": 3692 }, { "epoch": 2.38, "learning_rate": 2.1205605142129028e-06, "loss": 0.2944, "step": 3693 }, { "epoch": 2.38, "learning_rate": 2.1162675242084907e-06, "loss": 0.3055, "step": 3694 }, { "epoch": 2.39, "learning_rate": 2.1119783698176134e-06, "loss": 0.3121, "step": 3695 }, { "epoch": 2.39, "learning_rate": 2.107693053127049e-06, "loss": 0.3012, "step": 3696 }, { "epoch": 2.39, "learning_rate": 2.10341157622169e-06, "loss": 0.2891, "step": 3697 }, { "epoch": 2.39, "learning_rate": 2.099133941184582e-06, "loss": 0.2984, "step": 3698 }, { "epoch": 2.39, "learning_rate": 2.09486015009688e-06, "loss": 0.2763, "step": 3699 }, { "epoch": 2.39, "learning_rate": 2.090590205037888e-06, "loss": 0.3141, "step": 3700 }, { "epoch": 2.39, "learning_rate": 2.0863241080850273e-06, "loss": 0.2985, "step": 3701 }, { "epoch": 2.39, "learning_rate": 2.082061861313851e-06, "loss": 0.3078, "step": 3702 }, { "epoch": 2.39, "learning_rate": 2.0778034667980375e-06, "loss": 0.2995, "step": 3703 }, { "epoch": 2.39, "learning_rate": 2.0735489266093923e-06, "loss": 0.2983, "step": 3704 }, { "epoch": 2.39, "learning_rate": 2.069298242817844e-06, "loss": 0.287, "step": 3705 }, { "epoch": 2.39, "learning_rate": 2.065051417491446e-06, "loss": 0.2975, "step": 3706 }, { "epoch": 2.39, "learning_rate": 2.06080845269638e-06, "loss": 0.3151, "step": 3707 }, { "epoch": 2.39, "learning_rate": 2.056569350496933e-06, "loss": 0.2975, "step": 3708 }, { "epoch": 2.39, "learning_rate": 2.052334112955534e-06, "loss": 0.2957, "step": 3709 }, { "epoch": 2.4, "learning_rate": 2.048102742132718e-06, "loss": 0.2834, "step": 3710 }, { "epoch": 2.4, "learning_rate": 2.043875240087142e-06, "loss": 0.3016, "step": 3711 }, { "epoch": 2.4, "learning_rate": 2.0396516088755804e-06, "loss": 0.2768, "step": 3712 }, { "epoch": 2.4, "learning_rate": 2.035431850552926e-06, "loss": 0.3111, "step": 3713 }, { "epoch": 2.4, "learning_rate": 2.0312159671721854e-06, "loss": 0.3154, "step": 3714 }, { "epoch": 2.4, "learning_rate": 2.0270039607844815e-06, "loss": 0.3161, "step": 3715 }, { "epoch": 2.4, "learning_rate": 2.0227958334390506e-06, "loss": 0.2854, "step": 3716 }, { "epoch": 2.4, "learning_rate": 2.0185915871832417e-06, "loss": 0.2854, "step": 3717 }, { "epoch": 2.4, "learning_rate": 2.014391224062512e-06, "loss": 0.295, "step": 3718 }, { "epoch": 2.4, "learning_rate": 2.0101947461204375e-06, "loss": 0.3204, "step": 3719 }, { "epoch": 2.4, "learning_rate": 2.0060021553986974e-06, "loss": 0.3016, "step": 3720 }, { "epoch": 2.4, "learning_rate": 2.0018134539370827e-06, "loss": 0.3003, "step": 3721 }, { "epoch": 2.4, "learning_rate": 1.9976286437734893e-06, "loss": 0.2953, "step": 3722 }, { "epoch": 2.4, "learning_rate": 1.9934477269439225e-06, "loss": 0.2942, "step": 3723 }, { "epoch": 2.4, "learning_rate": 1.989270705482492e-06, "loss": 0.3007, "step": 3724 }, { "epoch": 2.4, "learning_rate": 1.9850975814214144e-06, "loss": 0.3376, "step": 3725 }, { "epoch": 2.41, "learning_rate": 1.9809283567910063e-06, "loss": 0.3006, "step": 3726 }, { "epoch": 2.41, "learning_rate": 1.9767630336196896e-06, "loss": 0.3172, "step": 3727 }, { "epoch": 2.41, "learning_rate": 1.9726016139339934e-06, "loss": 0.3026, "step": 3728 }, { "epoch": 2.41, "learning_rate": 1.968444099758532e-06, "loss": 0.294, "step": 3729 }, { "epoch": 2.41, "learning_rate": 1.964290493116041e-06, "loss": 0.2954, "step": 3730 }, { "epoch": 2.41, "learning_rate": 1.960140796027333e-06, "loss": 0.3125, "step": 3731 }, { "epoch": 2.41, "learning_rate": 1.955995010511338e-06, "loss": 0.3195, "step": 3732 }, { "epoch": 2.41, "learning_rate": 1.9518531385850693e-06, "loss": 0.2945, "step": 3733 }, { "epoch": 2.41, "learning_rate": 1.9477151822636418e-06, "loss": 0.3083, "step": 3734 }, { "epoch": 2.41, "learning_rate": 1.943581143560266e-06, "loss": 0.307, "step": 3735 }, { "epoch": 2.41, "learning_rate": 1.9394510244862397e-06, "loss": 0.308, "step": 3736 }, { "epoch": 2.41, "learning_rate": 1.9353248270509697e-06, "loss": 0.2883, "step": 3737 }, { "epoch": 2.41, "learning_rate": 1.9312025532619306e-06, "loss": 0.3043, "step": 3738 }, { "epoch": 2.41, "learning_rate": 1.927084205124714e-06, "loss": 0.3057, "step": 3739 }, { "epoch": 2.41, "learning_rate": 1.9229697846429773e-06, "loss": 0.2897, "step": 3740 }, { "epoch": 2.42, "learning_rate": 1.9188592938184913e-06, "loss": 0.289, "step": 3741 }, { "epoch": 2.42, "learning_rate": 1.91475273465109e-06, "loss": 0.2858, "step": 3742 }, { "epoch": 2.42, "learning_rate": 1.910650109138715e-06, "loss": 0.3068, "step": 3743 }, { "epoch": 2.42, "learning_rate": 1.9065514192773848e-06, "loss": 0.3024, "step": 3744 }, { "epoch": 2.42, "learning_rate": 1.9024566670611999e-06, "loss": 0.3065, "step": 3745 }, { "epoch": 2.42, "learning_rate": 1.8983658544823591e-06, "loss": 0.3049, "step": 3746 }, { "epoch": 2.42, "learning_rate": 1.8942789835311237e-06, "loss": 0.2984, "step": 3747 }, { "epoch": 2.42, "learning_rate": 1.8901960561958588e-06, "loss": 0.3224, "step": 3748 }, { "epoch": 2.42, "learning_rate": 1.8861170744629909e-06, "loss": 0.3019, "step": 3749 }, { "epoch": 2.42, "learning_rate": 1.8820420403170459e-06, "loss": 0.2905, "step": 3750 }, { "epoch": 2.42, "learning_rate": 1.87797095574061e-06, "loss": 0.3059, "step": 3751 }, { "epoch": 2.42, "learning_rate": 1.8739038227143658e-06, "loss": 0.2803, "step": 3752 }, { "epoch": 2.42, "learning_rate": 1.8698406432170613e-06, "loss": 0.3137, "step": 3753 }, { "epoch": 2.42, "learning_rate": 1.8657814192255264e-06, "loss": 0.2959, "step": 3754 }, { "epoch": 2.42, "learning_rate": 1.8617261527146647e-06, "loss": 0.3073, "step": 3755 }, { "epoch": 2.42, "learning_rate": 1.8576748456574512e-06, "loss": 0.2985, "step": 3756 }, { "epoch": 2.43, "learning_rate": 1.853627500024946e-06, "loss": 0.3057, "step": 3757 }, { "epoch": 2.43, "learning_rate": 1.849584117786265e-06, "loss": 0.2919, "step": 3758 }, { "epoch": 2.43, "learning_rate": 1.8455447009086136e-06, "loss": 0.3229, "step": 3759 }, { "epoch": 2.43, "learning_rate": 1.8415092513572498e-06, "loss": 0.3053, "step": 3760 }, { "epoch": 2.43, "learning_rate": 1.8374777710955183e-06, "loss": 0.3152, "step": 3761 }, { "epoch": 2.43, "learning_rate": 1.8334502620848216e-06, "loss": 0.293, "step": 3762 }, { "epoch": 2.43, "learning_rate": 1.8294267262846354e-06, "loss": 0.2962, "step": 3763 }, { "epoch": 2.43, "learning_rate": 1.8254071656524997e-06, "loss": 0.2921, "step": 3764 }, { "epoch": 2.43, "learning_rate": 1.8213915821440197e-06, "loss": 0.3032, "step": 3765 }, { "epoch": 2.43, "learning_rate": 1.8173799777128698e-06, "loss": 0.2949, "step": 3766 }, { "epoch": 2.43, "learning_rate": 1.813372354310785e-06, "loss": 0.3167, "step": 3767 }, { "epoch": 2.43, "learning_rate": 1.8093687138875648e-06, "loss": 0.2931, "step": 3768 }, { "epoch": 2.43, "learning_rate": 1.8053690583910665e-06, "loss": 0.2812, "step": 3769 }, { "epoch": 2.43, "learning_rate": 1.8013733897672193e-06, "loss": 0.3056, "step": 3770 }, { "epoch": 2.43, "learning_rate": 1.7973817099600032e-06, "loss": 0.3033, "step": 3771 }, { "epoch": 2.44, "learning_rate": 1.7933940209114597e-06, "loss": 0.2833, "step": 3772 }, { "epoch": 2.44, "learning_rate": 1.7894103245616901e-06, "loss": 0.3078, "step": 3773 }, { "epoch": 2.44, "learning_rate": 1.7854306228488515e-06, "loss": 0.3076, "step": 3774 }, { "epoch": 2.44, "learning_rate": 1.78145491770916e-06, "loss": 0.2971, "step": 3775 }, { "epoch": 2.44, "learning_rate": 1.7774832110768847e-06, "loss": 0.2963, "step": 3776 }, { "epoch": 2.44, "learning_rate": 1.773515504884351e-06, "loss": 0.3172, "step": 3777 }, { "epoch": 2.44, "learning_rate": 1.7695518010619373e-06, "loss": 0.3131, "step": 3778 }, { "epoch": 2.44, "learning_rate": 1.7655921015380706e-06, "loss": 0.2731, "step": 3779 }, { "epoch": 2.44, "learning_rate": 1.7616364082392446e-06, "loss": 0.3, "step": 3780 }, { "epoch": 2.44, "learning_rate": 1.7576847230899808e-06, "loss": 0.3098, "step": 3781 }, { "epoch": 2.44, "learning_rate": 1.7537370480128713e-06, "loss": 0.3151, "step": 3782 }, { "epoch": 2.44, "learning_rate": 1.7497933849285464e-06, "loss": 0.2956, "step": 3783 }, { "epoch": 2.44, "learning_rate": 1.745853735755687e-06, "loss": 0.3162, "step": 3784 }, { "epoch": 2.44, "learning_rate": 1.7419181024110221e-06, "loss": 0.3057, "step": 3785 }, { "epoch": 2.44, "learning_rate": 1.7379864868093254e-06, "loss": 0.2941, "step": 3786 }, { "epoch": 2.45, "learning_rate": 1.734058890863416e-06, "loss": 0.3063, "step": 3787 }, { "epoch": 2.45, "learning_rate": 1.7301353164841562e-06, "loss": 0.2784, "step": 3788 }, { "epoch": 2.45, "learning_rate": 1.72621576558046e-06, "loss": 0.3118, "step": 3789 }, { "epoch": 2.45, "learning_rate": 1.7223002400592693e-06, "loss": 0.3128, "step": 3790 }, { "epoch": 2.45, "learning_rate": 1.718388741825583e-06, "loss": 0.3051, "step": 3791 }, { "epoch": 2.45, "learning_rate": 1.7144812727824233e-06, "loss": 0.3014, "step": 3792 }, { "epoch": 2.45, "learning_rate": 1.7105778348308733e-06, "loss": 0.2964, "step": 3793 }, { "epoch": 2.45, "learning_rate": 1.7066784298700334e-06, "loss": 0.3132, "step": 3794 }, { "epoch": 2.45, "learning_rate": 1.7027830597970596e-06, "loss": 0.3108, "step": 3795 }, { "epoch": 2.45, "learning_rate": 1.6988917265071337e-06, "loss": 0.2956, "step": 3796 }, { "epoch": 2.45, "learning_rate": 1.6950044318934744e-06, "loss": 0.2819, "step": 3797 }, { "epoch": 2.45, "learning_rate": 1.6911211778473468e-06, "loss": 0.3098, "step": 3798 }, { "epoch": 2.45, "learning_rate": 1.687241966258032e-06, "loss": 0.318, "step": 3799 }, { "epoch": 2.45, "learning_rate": 1.6833667990128622e-06, "loss": 0.2897, "step": 3800 }, { "epoch": 2.45, "learning_rate": 1.6794956779971838e-06, "loss": 0.2986, "step": 3801 }, { "epoch": 2.45, "learning_rate": 1.6756286050943959e-06, "loss": 0.295, "step": 3802 }, { "epoch": 2.46, "learning_rate": 1.6717655821859058e-06, "loss": 0.3038, "step": 3803 }, { "epoch": 2.46, "learning_rate": 1.6679066111511677e-06, "loss": 0.3042, "step": 3804 }, { "epoch": 2.46, "learning_rate": 1.6640516938676566e-06, "loss": 0.3135, "step": 3805 }, { "epoch": 2.46, "learning_rate": 1.6602008322108742e-06, "loss": 0.2959, "step": 3806 }, { "epoch": 2.46, "learning_rate": 1.6563540280543588e-06, "loss": 0.3027, "step": 3807 }, { "epoch": 2.46, "learning_rate": 1.6525112832696576e-06, "loss": 0.3033, "step": 3808 }, { "epoch": 2.46, "learning_rate": 1.6486725997263619e-06, "loss": 0.2821, "step": 3809 }, { "epoch": 2.46, "learning_rate": 1.6448379792920677e-06, "loss": 0.2729, "step": 3810 }, { "epoch": 2.46, "learning_rate": 1.6410074238324148e-06, "loss": 0.2964, "step": 3811 }, { "epoch": 2.46, "learning_rate": 1.6371809352110446e-06, "loss": 0.3123, "step": 3812 }, { "epoch": 2.46, "learning_rate": 1.6333585152896381e-06, "loss": 0.3148, "step": 3813 }, { "epoch": 2.46, "learning_rate": 1.6295401659278843e-06, "loss": 0.3007, "step": 3814 }, { "epoch": 2.46, "learning_rate": 1.6257258889834993e-06, "loss": 0.3022, "step": 3815 }, { "epoch": 2.46, "learning_rate": 1.6219156863122121e-06, "loss": 0.293, "step": 3816 }, { "epoch": 2.46, "learning_rate": 1.6181095597677732e-06, "loss": 0.3117, "step": 3817 }, { "epoch": 2.47, "learning_rate": 1.6143075112019492e-06, "loss": 0.2846, "step": 3818 }, { "epoch": 2.47, "learning_rate": 1.61050954246452e-06, "loss": 0.2985, "step": 3819 }, { "epoch": 2.47, "learning_rate": 1.6067156554032893e-06, "loss": 0.3033, "step": 3820 }, { "epoch": 2.47, "learning_rate": 1.60292585186406e-06, "loss": 0.2976, "step": 3821 }, { "epoch": 2.47, "learning_rate": 1.5991401336906642e-06, "loss": 0.3006, "step": 3822 }, { "epoch": 2.47, "learning_rate": 1.5953585027249364e-06, "loss": 0.2803, "step": 3823 }, { "epoch": 2.47, "learning_rate": 1.5915809608067245e-06, "loss": 0.2917, "step": 3824 }, { "epoch": 2.47, "learning_rate": 1.5878075097738898e-06, "loss": 0.281, "step": 3825 }, { "epoch": 2.47, "learning_rate": 1.5840381514622994e-06, "loss": 0.3103, "step": 3826 }, { "epoch": 2.47, "learning_rate": 1.5802728877058314e-06, "loss": 0.3078, "step": 3827 }, { "epoch": 2.47, "learning_rate": 1.5765117203363723e-06, "loss": 0.2928, "step": 3828 }, { "epoch": 2.47, "learning_rate": 1.5727546511838133e-06, "loss": 0.3041, "step": 3829 }, { "epoch": 2.47, "learning_rate": 1.5690016820760556e-06, "loss": 0.2938, "step": 3830 }, { "epoch": 2.47, "learning_rate": 1.5652528148389988e-06, "loss": 0.2919, "step": 3831 }, { "epoch": 2.47, "learning_rate": 1.5615080512965563e-06, "loss": 0.3032, "step": 3832 }, { "epoch": 2.47, "learning_rate": 1.5577673932706384e-06, "loss": 0.2944, "step": 3833 }, { "epoch": 2.48, "learning_rate": 1.554030842581159e-06, "loss": 0.2946, "step": 3834 }, { "epoch": 2.48, "learning_rate": 1.5502984010460354e-06, "loss": 0.2851, "step": 3835 }, { "epoch": 2.48, "learning_rate": 1.5465700704811825e-06, "loss": 0.3123, "step": 3836 }, { "epoch": 2.48, "learning_rate": 1.5428458527005187e-06, "loss": 0.2823, "step": 3837 }, { "epoch": 2.48, "learning_rate": 1.5391257495159584e-06, "loss": 0.2852, "step": 3838 }, { "epoch": 2.48, "learning_rate": 1.5354097627374176e-06, "loss": 0.3168, "step": 3839 }, { "epoch": 2.48, "learning_rate": 1.5316978941728045e-06, "loss": 0.3072, "step": 3840 }, { "epoch": 2.48, "learning_rate": 1.5279901456280321e-06, "loss": 0.2987, "step": 3841 }, { "epoch": 2.48, "learning_rate": 1.5242865189069978e-06, "loss": 0.2695, "step": 3842 }, { "epoch": 2.48, "learning_rate": 1.5205870158116055e-06, "loss": 0.2867, "step": 3843 }, { "epoch": 2.48, "learning_rate": 1.5168916381417387e-06, "loss": 0.3028, "step": 3844 }, { "epoch": 2.48, "learning_rate": 1.5132003876952895e-06, "loss": 0.2965, "step": 3845 }, { "epoch": 2.48, "learning_rate": 1.5095132662681323e-06, "loss": 0.2987, "step": 3846 }, { "epoch": 2.48, "learning_rate": 1.5058302756541331e-06, "loss": 0.2786, "step": 3847 }, { "epoch": 2.48, "learning_rate": 1.5021514176451514e-06, "loss": 0.2956, "step": 3848 }, { "epoch": 2.49, "learning_rate": 1.4984766940310314e-06, "loss": 0.3114, "step": 3849 }, { "epoch": 2.49, "learning_rate": 1.4948061065996167e-06, "loss": 0.3373, "step": 3850 }, { "epoch": 2.49, "learning_rate": 1.4911396571367208e-06, "loss": 0.2798, "step": 3851 }, { "epoch": 2.49, "learning_rate": 1.487477347426164e-06, "loss": 0.2996, "step": 3852 }, { "epoch": 2.49, "learning_rate": 1.4838191792497315e-06, "loss": 0.2919, "step": 3853 }, { "epoch": 2.49, "learning_rate": 1.4801651543872165e-06, "loss": 0.3009, "step": 3854 }, { "epoch": 2.49, "learning_rate": 1.4765152746163724e-06, "loss": 0.3077, "step": 3855 }, { "epoch": 2.49, "learning_rate": 1.4728695417129579e-06, "loss": 0.3045, "step": 3856 }, { "epoch": 2.49, "learning_rate": 1.4692279574506996e-06, "loss": 0.3154, "step": 3857 }, { "epoch": 2.49, "learning_rate": 1.4655905236013079e-06, "loss": 0.2937, "step": 3858 }, { "epoch": 2.49, "learning_rate": 1.4619572419344852e-06, "loss": 0.2907, "step": 3859 }, { "epoch": 2.49, "learning_rate": 1.458328114217894e-06, "loss": 0.2988, "step": 3860 }, { "epoch": 2.49, "learning_rate": 1.4547031422171977e-06, "loss": 0.2934, "step": 3861 }, { "epoch": 2.49, "learning_rate": 1.4510823276960151e-06, "loss": 0.286, "step": 3862 }, { "epoch": 2.49, "learning_rate": 1.4474656724159653e-06, "loss": 0.2892, "step": 3863 }, { "epoch": 2.49, "learning_rate": 1.4438531781366226e-06, "loss": 0.2984, "step": 3864 }, { "epoch": 2.5, "learning_rate": 1.440244846615554e-06, "loss": 0.3067, "step": 3865 }, { "epoch": 2.5, "learning_rate": 1.436640679608292e-06, "loss": 0.2776, "step": 3866 }, { "epoch": 2.5, "learning_rate": 1.4330406788683448e-06, "loss": 0.312, "step": 3867 }, { "epoch": 2.5, "learning_rate": 1.4294448461471943e-06, "loss": 0.306, "step": 3868 }, { "epoch": 2.5, "learning_rate": 1.4258531831942912e-06, "loss": 0.3034, "step": 3869 }, { "epoch": 2.5, "learning_rate": 1.4222656917570675e-06, "loss": 0.3068, "step": 3870 }, { "epoch": 2.5, "learning_rate": 1.4186823735809096e-06, "loss": 0.2983, "step": 3871 }, { "epoch": 2.5, "learning_rate": 1.4151032304091928e-06, "loss": 0.2906, "step": 3872 }, { "epoch": 2.5, "learning_rate": 1.4115282639832395e-06, "loss": 0.3213, "step": 3873 }, { "epoch": 2.5, "learning_rate": 1.407957476042361e-06, "loss": 0.2943, "step": 3874 }, { "epoch": 2.5, "learning_rate": 1.4043908683238217e-06, "loss": 0.2885, "step": 3875 }, { "epoch": 2.5, "learning_rate": 1.400828442562857e-06, "loss": 0.2889, "step": 3876 }, { "epoch": 2.5, "learning_rate": 1.3972702004926675e-06, "loss": 0.3024, "step": 3877 }, { "epoch": 2.5, "learning_rate": 1.3937161438444191e-06, "loss": 0.3004, "step": 3878 }, { "epoch": 2.5, "learning_rate": 1.390166274347239e-06, "loss": 0.3081, "step": 3879 }, { "epoch": 2.51, "learning_rate": 1.3866205937282195e-06, "loss": 0.3008, "step": 3880 }, { "epoch": 2.51, "learning_rate": 1.3830791037124147e-06, "loss": 0.3019, "step": 3881 }, { "epoch": 2.51, "learning_rate": 1.3795418060228361e-06, "loss": 0.2926, "step": 3882 }, { "epoch": 2.51, "learning_rate": 1.3760087023804635e-06, "loss": 0.3123, "step": 3883 }, { "epoch": 2.51, "learning_rate": 1.372479794504229e-06, "loss": 0.3043, "step": 3884 }, { "epoch": 2.51, "learning_rate": 1.3689550841110255e-06, "loss": 0.3245, "step": 3885 }, { "epoch": 2.51, "learning_rate": 1.3654345729157059e-06, "loss": 0.2866, "step": 3886 }, { "epoch": 2.51, "learning_rate": 1.3619182626310768e-06, "loss": 0.2944, "step": 3887 }, { "epoch": 2.51, "learning_rate": 1.3584061549679017e-06, "loss": 0.2812, "step": 3888 }, { "epoch": 2.51, "learning_rate": 1.3548982516349019e-06, "loss": 0.291, "step": 3889 }, { "epoch": 2.51, "learning_rate": 1.3513945543387497e-06, "loss": 0.3025, "step": 3890 }, { "epoch": 2.51, "learning_rate": 1.347895064784075e-06, "loss": 0.2911, "step": 3891 }, { "epoch": 2.51, "learning_rate": 1.3443997846734535e-06, "loss": 0.3198, "step": 3892 }, { "epoch": 2.51, "learning_rate": 1.3409087157074264e-06, "loss": 0.306, "step": 3893 }, { "epoch": 2.51, "learning_rate": 1.3374218595844668e-06, "loss": 0.324, "step": 3894 }, { "epoch": 2.51, "learning_rate": 1.333939218001018e-06, "loss": 0.271, "step": 3895 }, { "epoch": 2.52, "learning_rate": 1.330460792651459e-06, "loss": 0.2816, "step": 3896 }, { "epoch": 2.52, "learning_rate": 1.3269865852281228e-06, "loss": 0.3161, "step": 3897 }, { "epoch": 2.52, "learning_rate": 1.3235165974212905e-06, "loss": 0.3027, "step": 3898 }, { "epoch": 2.52, "learning_rate": 1.3200508309191883e-06, "loss": 0.3172, "step": 3899 }, { "epoch": 2.52, "learning_rate": 1.31658928740799e-06, "loss": 0.3101, "step": 3900 }, { "epoch": 2.52, "learning_rate": 1.3131319685718126e-06, "loss": 0.3067, "step": 3901 }, { "epoch": 2.52, "learning_rate": 1.309678876092727e-06, "loss": 0.3112, "step": 3902 }, { "epoch": 2.52, "learning_rate": 1.3062300116507299e-06, "loss": 0.3027, "step": 3903 }, { "epoch": 2.52, "learning_rate": 1.3027853769237808e-06, "loss": 0.297, "step": 3904 }, { "epoch": 2.52, "learning_rate": 1.299344973587765e-06, "loss": 0.2983, "step": 3905 }, { "epoch": 2.52, "learning_rate": 1.2959088033165212e-06, "loss": 0.2974, "step": 3906 }, { "epoch": 2.52, "learning_rate": 1.2924768677818222e-06, "loss": 0.3029, "step": 3907 }, { "epoch": 2.52, "learning_rate": 1.2890491686533812e-06, "loss": 0.291, "step": 3908 }, { "epoch": 2.52, "learning_rate": 1.285625707598852e-06, "loss": 0.2921, "step": 3909 }, { "epoch": 2.52, "learning_rate": 1.2822064862838236e-06, "loss": 0.2951, "step": 3910 }, { "epoch": 2.53, "learning_rate": 1.2787915063718293e-06, "loss": 0.3039, "step": 3911 }, { "epoch": 2.53, "learning_rate": 1.2753807695243258e-06, "loss": 0.3088, "step": 3912 }, { "epoch": 2.53, "learning_rate": 1.2719742774007237e-06, "loss": 0.3066, "step": 3913 }, { "epoch": 2.53, "learning_rate": 1.2685720316583471e-06, "loss": 0.2845, "step": 3914 }, { "epoch": 2.53, "learning_rate": 1.2651740339524753e-06, "loss": 0.3141, "step": 3915 }, { "epoch": 2.53, "learning_rate": 1.2617802859363016e-06, "loss": 0.2984, "step": 3916 }, { "epoch": 2.53, "learning_rate": 1.2583907892609681e-06, "loss": 0.2982, "step": 3917 }, { "epoch": 2.53, "learning_rate": 1.2550055455755383e-06, "loss": 0.3091, "step": 3918 }, { "epoch": 2.53, "learning_rate": 1.251624556527008e-06, "loss": 0.3162, "step": 3919 }, { "epoch": 2.53, "learning_rate": 1.2482478237603102e-06, "loss": 0.2893, "step": 3920 }, { "epoch": 2.53, "learning_rate": 1.244875348918293e-06, "loss": 0.3112, "step": 3921 }, { "epoch": 2.53, "learning_rate": 1.2415071336417516e-06, "loss": 0.3008, "step": 3922 }, { "epoch": 2.53, "learning_rate": 1.2381431795693888e-06, "loss": 0.3149, "step": 3923 }, { "epoch": 2.53, "learning_rate": 1.2347834883378518e-06, "loss": 0.2944, "step": 3924 }, { "epoch": 2.53, "learning_rate": 1.2314280615816999e-06, "loss": 0.299, "step": 3925 }, { "epoch": 2.53, "learning_rate": 1.2280769009334282e-06, "loss": 0.2788, "step": 3926 }, { "epoch": 2.54, "learning_rate": 1.2247300080234502e-06, "loss": 0.3081, "step": 3927 }, { "epoch": 2.54, "learning_rate": 1.2213873844801049e-06, "loss": 0.2935, "step": 3928 }, { "epoch": 2.54, "learning_rate": 1.2180490319296546e-06, "loss": 0.2986, "step": 3929 }, { "epoch": 2.54, "learning_rate": 1.214714951996282e-06, "loss": 0.3046, "step": 3930 }, { "epoch": 2.54, "learning_rate": 1.211385146302093e-06, "loss": 0.2607, "step": 3931 }, { "epoch": 2.54, "learning_rate": 1.2080596164671098e-06, "loss": 0.294, "step": 3932 }, { "epoch": 2.54, "learning_rate": 1.2047383641092858e-06, "loss": 0.2904, "step": 3933 }, { "epoch": 2.54, "learning_rate": 1.2014213908444761e-06, "loss": 0.2941, "step": 3934 }, { "epoch": 2.54, "learning_rate": 1.1981086982864676e-06, "loss": 0.2854, "step": 3935 }, { "epoch": 2.54, "learning_rate": 1.1948002880469601e-06, "loss": 0.2973, "step": 3936 }, { "epoch": 2.54, "learning_rate": 1.191496161735568e-06, "loss": 0.317, "step": 3937 }, { "epoch": 2.54, "learning_rate": 1.1881963209598246e-06, "loss": 0.2899, "step": 3938 }, { "epoch": 2.54, "learning_rate": 1.1849007673251755e-06, "loss": 0.2886, "step": 3939 }, { "epoch": 2.54, "learning_rate": 1.1816095024349828e-06, "loss": 0.3111, "step": 3940 }, { "epoch": 2.54, "learning_rate": 1.1783225278905208e-06, "loss": 0.3101, "step": 3941 }, { "epoch": 2.55, "learning_rate": 1.1750398452909761e-06, "loss": 0.2972, "step": 3942 }, { "epoch": 2.55, "learning_rate": 1.171761456233449e-06, "loss": 0.3035, "step": 3943 }, { "epoch": 2.55, "learning_rate": 1.1684873623129457e-06, "loss": 0.2968, "step": 3944 }, { "epoch": 2.55, "learning_rate": 1.1652175651223928e-06, "loss": 0.3104, "step": 3945 }, { "epoch": 2.55, "learning_rate": 1.1619520662526174e-06, "loss": 0.3097, "step": 3946 }, { "epoch": 2.55, "learning_rate": 1.1586908672923581e-06, "loss": 0.293, "step": 3947 }, { "epoch": 2.55, "learning_rate": 1.1554339698282623e-06, "loss": 0.2962, "step": 3948 }, { "epoch": 2.55, "learning_rate": 1.152181375444884e-06, "loss": 0.3006, "step": 3949 }, { "epoch": 2.55, "learning_rate": 1.1489330857246828e-06, "loss": 0.2848, "step": 3950 }, { "epoch": 2.55, "learning_rate": 1.1456891022480254e-06, "loss": 0.2981, "step": 3951 }, { "epoch": 2.55, "learning_rate": 1.1424494265931829e-06, "loss": 0.32, "step": 3952 }, { "epoch": 2.55, "learning_rate": 1.1392140603363288e-06, "loss": 0.2883, "step": 3953 }, { "epoch": 2.55, "learning_rate": 1.1359830050515475e-06, "loss": 0.3112, "step": 3954 }, { "epoch": 2.55, "learning_rate": 1.1327562623108123e-06, "loss": 0.3197, "step": 3955 }, { "epoch": 2.55, "learning_rate": 1.1295338336840113e-06, "loss": 0.3165, "step": 3956 }, { "epoch": 2.55, "learning_rate": 1.1263157207389274e-06, "loss": 0.2945, "step": 3957 }, { "epoch": 2.56, "learning_rate": 1.1231019250412467e-06, "loss": 0.309, "step": 3958 }, { "epoch": 2.56, "learning_rate": 1.1198924481545503e-06, "loss": 0.2888, "step": 3959 }, { "epoch": 2.56, "learning_rate": 1.1166872916403226e-06, "loss": 0.3085, "step": 3960 }, { "epoch": 2.56, "learning_rate": 1.113486457057944e-06, "loss": 0.3201, "step": 3961 }, { "epoch": 2.56, "learning_rate": 1.110289945964692e-06, "loss": 0.304, "step": 3962 }, { "epoch": 2.56, "learning_rate": 1.107097759915745e-06, "loss": 0.3065, "step": 3963 }, { "epoch": 2.56, "learning_rate": 1.1039099004641684e-06, "loss": 0.2948, "step": 3964 }, { "epoch": 2.56, "learning_rate": 1.1007263691609326e-06, "loss": 0.3059, "step": 3965 }, { "epoch": 2.56, "learning_rate": 1.097547167554891e-06, "loss": 0.2931, "step": 3966 }, { "epoch": 2.56, "learning_rate": 1.0943722971928027e-06, "loss": 0.3118, "step": 3967 }, { "epoch": 2.56, "learning_rate": 1.0912017596193115e-06, "loss": 0.2951, "step": 3968 }, { "epoch": 2.56, "learning_rate": 1.0880355563769562e-06, "loss": 0.2942, "step": 3969 }, { "epoch": 2.56, "learning_rate": 1.084873689006164e-06, "loss": 0.3099, "step": 3970 }, { "epoch": 2.56, "learning_rate": 1.081716159045255e-06, "loss": 0.2983, "step": 3971 }, { "epoch": 2.56, "learning_rate": 1.0785629680304433e-06, "loss": 0.2976, "step": 3972 }, { "epoch": 2.57, "learning_rate": 1.0754141174958188e-06, "loss": 0.283, "step": 3973 }, { "epoch": 2.57, "learning_rate": 1.0722696089733787e-06, "loss": 0.2871, "step": 3974 }, { "epoch": 2.57, "learning_rate": 1.0691294439929867e-06, "loss": 0.3118, "step": 3975 }, { "epoch": 2.57, "learning_rate": 1.065993624082411e-06, "loss": 0.3044, "step": 3976 }, { "epoch": 2.57, "learning_rate": 1.0628621507672964e-06, "loss": 0.3143, "step": 3977 }, { "epoch": 2.57, "learning_rate": 1.0597350255711758e-06, "loss": 0.3061, "step": 3978 }, { "epoch": 2.57, "learning_rate": 1.0566122500154653e-06, "loss": 0.3005, "step": 3979 }, { "epoch": 2.57, "learning_rate": 1.053493825619467e-06, "loss": 0.2914, "step": 3980 }, { "epoch": 2.57, "learning_rate": 1.0503797539003624e-06, "loss": 0.3056, "step": 3981 }, { "epoch": 2.57, "learning_rate": 1.047270036373217e-06, "loss": 0.293, "step": 3982 }, { "epoch": 2.57, "learning_rate": 1.0441646745509848e-06, "loss": 0.3175, "step": 3983 }, { "epoch": 2.57, "learning_rate": 1.0410636699444855e-06, "loss": 0.2863, "step": 3984 }, { "epoch": 2.57, "learning_rate": 1.0379670240624361e-06, "loss": 0.2818, "step": 3985 }, { "epoch": 2.57, "learning_rate": 1.0348747384114167e-06, "loss": 0.2979, "step": 3986 }, { "epoch": 2.57, "learning_rate": 1.0317868144958987e-06, "loss": 0.3005, "step": 3987 }, { "epoch": 2.57, "learning_rate": 1.0287032538182262e-06, "loss": 0.3014, "step": 3988 }, { "epoch": 2.58, "learning_rate": 1.0256240578786214e-06, "loss": 0.3046, "step": 3989 }, { "epoch": 2.58, "learning_rate": 1.0225492281751815e-06, "loss": 0.3131, "step": 3990 }, { "epoch": 2.58, "learning_rate": 1.0194787662038796e-06, "loss": 0.2822, "step": 3991 }, { "epoch": 2.58, "learning_rate": 1.0164126734585667e-06, "loss": 0.3094, "step": 3992 }, { "epoch": 2.58, "learning_rate": 1.0133509514309648e-06, "loss": 0.295, "step": 3993 }, { "epoch": 2.58, "learning_rate": 1.0102936016106702e-06, "loss": 0.3178, "step": 3994 }, { "epoch": 2.58, "learning_rate": 1.0072406254851518e-06, "loss": 0.3174, "step": 3995 }, { "epoch": 2.58, "learning_rate": 1.0041920245397552e-06, "loss": 0.2968, "step": 3996 }, { "epoch": 2.58, "learning_rate": 1.0011478002576903e-06, "loss": 0.2992, "step": 3997 }, { "epoch": 2.58, "learning_rate": 9.981079541200412e-07, "loss": 0.3044, "step": 3998 }, { "epoch": 2.58, "learning_rate": 9.950724876057616e-07, "loss": 0.2981, "step": 3999 }, { "epoch": 2.58, "learning_rate": 9.920414021916747e-07, "loss": 0.305, "step": 4000 }, { "epoch": 2.58, "learning_rate": 9.89014699352473e-07, "loss": 0.2892, "step": 4001 }, { "epoch": 2.58, "learning_rate": 9.85992380560713e-07, "loss": 0.3047, "step": 4002 }, { "epoch": 2.58, "learning_rate": 9.82974447286823e-07, "loss": 0.2815, "step": 4003 }, { "epoch": 2.59, "learning_rate": 9.79960900999094e-07, "loss": 0.2947, "step": 4004 }, { "epoch": 2.59, "learning_rate": 9.769517431636843e-07, "loss": 0.2919, "step": 4005 }, { "epoch": 2.59, "learning_rate": 9.739469752446197e-07, "loss": 0.3058, "step": 4006 }, { "epoch": 2.59, "learning_rate": 9.709465987037848e-07, "loss": 0.3013, "step": 4007 }, { "epoch": 2.59, "learning_rate": 9.67950615000931e-07, "loss": 0.3173, "step": 4008 }, { "epoch": 2.59, "learning_rate": 9.649590255936735e-07, "loss": 0.319, "step": 4009 }, { "epoch": 2.59, "learning_rate": 9.61971831937485e-07, "loss": 0.2952, "step": 4010 }, { "epoch": 2.59, "learning_rate": 9.589890354857056e-07, "loss": 0.3228, "step": 4011 }, { "epoch": 2.59, "learning_rate": 9.560106376895305e-07, "loss": 0.2828, "step": 4012 }, { "epoch": 2.59, "learning_rate": 9.530366399980184e-07, "loss": 0.2918, "step": 4013 }, { "epoch": 2.59, "learning_rate": 9.50067043858085e-07, "loss": 0.3166, "step": 4014 }, { "epoch": 2.59, "learning_rate": 9.471018507145102e-07, "loss": 0.2923, "step": 4015 }, { "epoch": 2.59, "learning_rate": 9.441410620099201e-07, "loss": 0.285, "step": 4016 }, { "epoch": 2.59, "learning_rate": 9.411846791848123e-07, "loss": 0.2977, "step": 4017 }, { "epoch": 2.59, "learning_rate": 9.382327036775274e-07, "loss": 0.2942, "step": 4018 }, { "epoch": 2.59, "learning_rate": 9.352851369242733e-07, "loss": 0.2861, "step": 4019 }, { "epoch": 2.6, "learning_rate": 9.32341980359105e-07, "loss": 0.2898, "step": 4020 }, { "epoch": 2.6, "learning_rate": 9.294032354139349e-07, "loss": 0.3173, "step": 4021 }, { "epoch": 2.6, "learning_rate": 9.264689035185282e-07, "loss": 0.3086, "step": 4022 }, { "epoch": 2.6, "learning_rate": 9.235389861005029e-07, "loss": 0.2991, "step": 4023 }, { "epoch": 2.6, "learning_rate": 9.206134845853343e-07, "loss": 0.2997, "step": 4024 }, { "epoch": 2.6, "learning_rate": 9.176924003963383e-07, "loss": 0.3159, "step": 4025 }, { "epoch": 2.6, "learning_rate": 9.147757349546948e-07, "loss": 0.3065, "step": 4026 }, { "epoch": 2.6, "learning_rate": 9.118634896794199e-07, "loss": 0.3105, "step": 4027 }, { "epoch": 2.6, "learning_rate": 9.089556659873921e-07, "loss": 0.314, "step": 4028 }, { "epoch": 2.6, "learning_rate": 9.060522652933313e-07, "loss": 0.2968, "step": 4029 }, { "epoch": 2.6, "learning_rate": 9.031532890098083e-07, "loss": 0.3014, "step": 4030 }, { "epoch": 2.6, "learning_rate": 9.002587385472394e-07, "loss": 0.2786, "step": 4031 }, { "epoch": 2.6, "learning_rate": 8.973686153138872e-07, "loss": 0.3099, "step": 4032 }, { "epoch": 2.6, "learning_rate": 8.944829207158678e-07, "loss": 0.3075, "step": 4033 }, { "epoch": 2.6, "learning_rate": 8.916016561571284e-07, "loss": 0.2647, "step": 4034 }, { "epoch": 2.61, "learning_rate": 8.887248230394763e-07, "loss": 0.2944, "step": 4035 }, { "epoch": 2.61, "learning_rate": 8.858524227625498e-07, "loss": 0.2932, "step": 4036 }, { "epoch": 2.61, "learning_rate": 8.829844567238399e-07, "loss": 0.3183, "step": 4037 }, { "epoch": 2.61, "learning_rate": 8.801209263186761e-07, "loss": 0.2962, "step": 4038 }, { "epoch": 2.61, "learning_rate": 8.772618329402305e-07, "loss": 0.3081, "step": 4039 }, { "epoch": 2.61, "learning_rate": 8.744071779795171e-07, "loss": 0.2975, "step": 4040 }, { "epoch": 2.61, "learning_rate": 8.715569628253884e-07, "loss": 0.2952, "step": 4041 }, { "epoch": 2.61, "learning_rate": 8.687111888645383e-07, "loss": 0.2823, "step": 4042 }, { "epoch": 2.61, "learning_rate": 8.658698574815017e-07, "loss": 0.3048, "step": 4043 }, { "epoch": 2.61, "learning_rate": 8.630329700586481e-07, "loss": 0.3137, "step": 4044 }, { "epoch": 2.61, "learning_rate": 8.602005279761871e-07, "loss": 0.3, "step": 4045 }, { "epoch": 2.61, "learning_rate": 8.573725326121696e-07, "loss": 0.29, "step": 4046 }, { "epoch": 2.61, "learning_rate": 8.545489853424749e-07, "loss": 0.2897, "step": 4047 }, { "epoch": 2.61, "learning_rate": 8.517298875408253e-07, "loss": 0.3089, "step": 4048 }, { "epoch": 2.61, "learning_rate": 8.489152405787726e-07, "loss": 0.3168, "step": 4049 }, { "epoch": 2.61, "learning_rate": 8.461050458257081e-07, "loss": 0.3058, "step": 4050 }, { "epoch": 2.62, "learning_rate": 8.432993046488536e-07, "loss": 0.3092, "step": 4051 }, { "epoch": 2.62, "learning_rate": 8.40498018413266e-07, "loss": 0.288, "step": 4052 }, { "epoch": 2.62, "learning_rate": 8.377011884818353e-07, "loss": 0.2999, "step": 4053 }, { "epoch": 2.62, "learning_rate": 8.349088162152796e-07, "loss": 0.2884, "step": 4054 }, { "epoch": 2.62, "learning_rate": 8.321209029721533e-07, "loss": 0.3224, "step": 4055 }, { "epoch": 2.62, "learning_rate": 8.293374501088358e-07, "loss": 0.3146, "step": 4056 }, { "epoch": 2.62, "learning_rate": 8.26558458979545e-07, "loss": 0.2939, "step": 4057 }, { "epoch": 2.62, "learning_rate": 8.237839309363183e-07, "loss": 0.2919, "step": 4058 }, { "epoch": 2.62, "learning_rate": 8.210138673290291e-07, "loss": 0.2943, "step": 4059 }, { "epoch": 2.62, "learning_rate": 8.182482695053728e-07, "loss": 0.2956, "step": 4060 }, { "epoch": 2.62, "learning_rate": 8.154871388108776e-07, "loss": 0.3211, "step": 4061 }, { "epoch": 2.62, "learning_rate": 8.127304765888943e-07, "loss": 0.3116, "step": 4062 }, { "epoch": 2.62, "learning_rate": 8.099782841806014e-07, "loss": 0.2995, "step": 4063 }, { "epoch": 2.62, "learning_rate": 8.072305629250033e-07, "loss": 0.297, "step": 4064 }, { "epoch": 2.62, "learning_rate": 8.044873141589282e-07, "loss": 0.2937, "step": 4065 }, { "epoch": 2.63, "learning_rate": 8.017485392170255e-07, "loss": 0.3164, "step": 4066 }, { "epoch": 2.63, "learning_rate": 7.990142394317779e-07, "loss": 0.3092, "step": 4067 }, { "epoch": 2.63, "learning_rate": 7.962844161334748e-07, "loss": 0.2951, "step": 4068 }, { "epoch": 2.63, "learning_rate": 7.935590706502439e-07, "loss": 0.2931, "step": 4069 }, { "epoch": 2.63, "learning_rate": 7.908382043080253e-07, "loss": 0.2992, "step": 4070 }, { "epoch": 2.63, "learning_rate": 7.881218184305806e-07, "loss": 0.2924, "step": 4071 }, { "epoch": 2.63, "learning_rate": 7.854099143394933e-07, "loss": 0.2998, "step": 4072 }, { "epoch": 2.63, "learning_rate": 7.82702493354166e-07, "loss": 0.2941, "step": 4073 }, { "epoch": 2.63, "learning_rate": 7.799995567918206e-07, "loss": 0.3017, "step": 4074 }, { "epoch": 2.63, "learning_rate": 7.77301105967494e-07, "loss": 0.3158, "step": 4075 }, { "epoch": 2.63, "learning_rate": 7.746071421940482e-07, "loss": 0.3152, "step": 4076 }, { "epoch": 2.63, "learning_rate": 7.719176667821515e-07, "loss": 0.2991, "step": 4077 }, { "epoch": 2.63, "learning_rate": 7.692326810403017e-07, "loss": 0.3016, "step": 4078 }, { "epoch": 2.63, "learning_rate": 7.66552186274796e-07, "loss": 0.3027, "step": 4079 }, { "epoch": 2.63, "learning_rate": 7.638761837897612e-07, "loss": 0.3065, "step": 4080 }, { "epoch": 2.63, "learning_rate": 7.612046748871327e-07, "loss": 0.293, "step": 4081 }, { "epoch": 2.64, "learning_rate": 7.585376608666583e-07, "loss": 0.2876, "step": 4082 }, { "epoch": 2.64, "learning_rate": 7.558751430259015e-07, "loss": 0.3001, "step": 4083 }, { "epoch": 2.64, "learning_rate": 7.532171226602336e-07, "loss": 0.2948, "step": 4084 }, { "epoch": 2.64, "learning_rate": 7.505636010628492e-07, "loss": 0.3149, "step": 4085 }, { "epoch": 2.64, "learning_rate": 7.479145795247367e-07, "loss": 0.3016, "step": 4086 }, { "epoch": 2.64, "learning_rate": 7.45270059334714e-07, "loss": 0.3205, "step": 4087 }, { "epoch": 2.64, "learning_rate": 7.426300417793919e-07, "loss": 0.3044, "step": 4088 }, { "epoch": 2.64, "learning_rate": 7.399945281432041e-07, "loss": 0.2862, "step": 4089 }, { "epoch": 2.64, "learning_rate": 7.37363519708384e-07, "loss": 0.2969, "step": 4090 }, { "epoch": 2.64, "learning_rate": 7.347370177549784e-07, "loss": 0.2996, "step": 4091 }, { "epoch": 2.64, "learning_rate": 7.321150235608398e-07, "loss": 0.3008, "step": 4092 }, { "epoch": 2.64, "learning_rate": 7.294975384016257e-07, "loss": 0.2883, "step": 4093 }, { "epoch": 2.64, "learning_rate": 7.268845635508037e-07, "loss": 0.277, "step": 4094 }, { "epoch": 2.64, "learning_rate": 7.242761002796405e-07, "loss": 0.3111, "step": 4095 }, { "epoch": 2.64, "learning_rate": 7.21672149857221e-07, "loss": 0.2861, "step": 4096 }, { "epoch": 2.65, "learning_rate": 7.190727135504161e-07, "loss": 0.3161, "step": 4097 }, { "epoch": 2.65, "learning_rate": 7.164777926239164e-07, "loss": 0.3088, "step": 4098 }, { "epoch": 2.65, "learning_rate": 7.138873883402064e-07, "loss": 0.3102, "step": 4099 }, { "epoch": 2.65, "learning_rate": 7.113015019595793e-07, "loss": 0.3092, "step": 4100 }, { "epoch": 2.65, "learning_rate": 7.08720134740123e-07, "loss": 0.2915, "step": 4101 }, { "epoch": 2.65, "learning_rate": 7.061432879377339e-07, "loss": 0.3189, "step": 4102 }, { "epoch": 2.65, "learning_rate": 7.03570962806106e-07, "loss": 0.2863, "step": 4103 }, { "epoch": 2.65, "learning_rate": 7.010031605967316e-07, "loss": 0.311, "step": 4104 }, { "epoch": 2.65, "learning_rate": 6.984398825589045e-07, "loss": 0.2959, "step": 4105 }, { "epoch": 2.65, "learning_rate": 6.958811299397194e-07, "loss": 0.2861, "step": 4106 }, { "epoch": 2.65, "learning_rate": 6.933269039840628e-07, "loss": 0.2923, "step": 4107 }, { "epoch": 2.65, "learning_rate": 6.907772059346285e-07, "loss": 0.3041, "step": 4108 }, { "epoch": 2.65, "learning_rate": 6.882320370319006e-07, "loss": 0.2957, "step": 4109 }, { "epoch": 2.65, "learning_rate": 6.856913985141589e-07, "loss": 0.2973, "step": 4110 }, { "epoch": 2.65, "learning_rate": 6.831552916174833e-07, "loss": 0.3011, "step": 4111 }, { "epoch": 2.65, "learning_rate": 6.806237175757457e-07, "loss": 0.2928, "step": 4112 }, { "epoch": 2.66, "learning_rate": 6.780966776206144e-07, "loss": 0.3014, "step": 4113 }, { "epoch": 2.66, "learning_rate": 6.755741729815524e-07, "loss": 0.3107, "step": 4114 }, { "epoch": 2.66, "learning_rate": 6.730562048858136e-07, "loss": 0.3001, "step": 4115 }, { "epoch": 2.66, "learning_rate": 6.705427745584469e-07, "loss": 0.2944, "step": 4116 }, { "epoch": 2.66, "learning_rate": 6.680338832222921e-07, "loss": 0.2896, "step": 4117 }, { "epoch": 2.66, "learning_rate": 6.655295320979804e-07, "loss": 0.2965, "step": 4118 }, { "epoch": 2.66, "learning_rate": 6.630297224039383e-07, "loss": 0.2982, "step": 4119 }, { "epoch": 2.66, "learning_rate": 6.605344553563775e-07, "loss": 0.299, "step": 4120 }, { "epoch": 2.66, "learning_rate": 6.580437321693023e-07, "loss": 0.2919, "step": 4121 }, { "epoch": 2.66, "learning_rate": 6.555575540545056e-07, "loss": 0.2973, "step": 4122 }, { "epoch": 2.66, "learning_rate": 6.530759222215688e-07, "loss": 0.3041, "step": 4123 }, { "epoch": 2.66, "learning_rate": 6.505988378778616e-07, "loss": 0.2866, "step": 4124 }, { "epoch": 2.66, "learning_rate": 6.481263022285411e-07, "loss": 0.2795, "step": 4125 }, { "epoch": 2.66, "learning_rate": 6.456583164765518e-07, "loss": 0.2996, "step": 4126 }, { "epoch": 2.66, "learning_rate": 6.431948818226241e-07, "loss": 0.2957, "step": 4127 }, { "epoch": 2.67, "learning_rate": 6.407359994652773e-07, "loss": 0.3008, "step": 4128 }, { "epoch": 2.67, "learning_rate": 6.382816706008088e-07, "loss": 0.3084, "step": 4129 }, { "epoch": 2.67, "learning_rate": 6.35831896423309e-07, "loss": 0.3145, "step": 4130 }, { "epoch": 2.67, "learning_rate": 6.333866781246433e-07, "loss": 0.3053, "step": 4131 }, { "epoch": 2.67, "learning_rate": 6.309460168944692e-07, "loss": 0.307, "step": 4132 }, { "epoch": 2.67, "learning_rate": 6.285099139202233e-07, "loss": 0.3009, "step": 4133 }, { "epoch": 2.67, "learning_rate": 6.260783703871243e-07, "loss": 0.2898, "step": 4134 }, { "epoch": 2.67, "learning_rate": 6.236513874781724e-07, "loss": 0.2878, "step": 4135 }, { "epoch": 2.67, "learning_rate": 6.212289663741477e-07, "loss": 0.2886, "step": 4136 }, { "epoch": 2.67, "learning_rate": 6.188111082536174e-07, "loss": 0.2858, "step": 4137 }, { "epoch": 2.67, "learning_rate": 6.163978142929183e-07, "loss": 0.2858, "step": 4138 }, { "epoch": 2.67, "learning_rate": 6.139890856661778e-07, "loss": 0.2888, "step": 4139 }, { "epoch": 2.67, "learning_rate": 6.11584923545292e-07, "loss": 0.3072, "step": 4140 }, { "epoch": 2.67, "learning_rate": 6.091853290999417e-07, "loss": 0.2835, "step": 4141 }, { "epoch": 2.67, "learning_rate": 6.067903034975853e-07, "loss": 0.3075, "step": 4142 }, { "epoch": 2.67, "learning_rate": 6.043998479034551e-07, "loss": 0.3227, "step": 4143 }, { "epoch": 2.68, "learning_rate": 6.020139634805622e-07, "loss": 0.293, "step": 4144 }, { "epoch": 2.68, "learning_rate": 5.996326513896911e-07, "loss": 0.3, "step": 4145 }, { "epoch": 2.68, "learning_rate": 5.972559127894084e-07, "loss": 0.2958, "step": 4146 }, { "epoch": 2.68, "learning_rate": 5.948837488360448e-07, "loss": 0.3116, "step": 4147 }, { "epoch": 2.68, "learning_rate": 5.925161606837182e-07, "loss": 0.309, "step": 4148 }, { "epoch": 2.68, "learning_rate": 5.901531494843071e-07, "loss": 0.3146, "step": 4149 }, { "epoch": 2.68, "learning_rate": 5.877947163874753e-07, "loss": 0.3155, "step": 4150 }, { "epoch": 2.68, "learning_rate": 5.854408625406505e-07, "loss": 0.2894, "step": 4151 }, { "epoch": 2.68, "learning_rate": 5.830915890890366e-07, "loss": 0.3019, "step": 4152 }, { "epoch": 2.68, "learning_rate": 5.80746897175607e-07, "loss": 0.2973, "step": 4153 }, { "epoch": 2.68, "learning_rate": 5.784067879411093e-07, "loss": 0.2771, "step": 4154 }, { "epoch": 2.68, "learning_rate": 5.760712625240572e-07, "loss": 0.3048, "step": 4155 }, { "epoch": 2.68, "learning_rate": 5.737403220607374e-07, "loss": 0.2993, "step": 4156 }, { "epoch": 2.68, "learning_rate": 5.714139676852059e-07, "loss": 0.3033, "step": 4157 }, { "epoch": 2.68, "learning_rate": 5.690922005292831e-07, "loss": 0.3004, "step": 4158 }, { "epoch": 2.69, "learning_rate": 5.667750217225654e-07, "loss": 0.2954, "step": 4159 }, { "epoch": 2.69, "learning_rate": 5.644624323924108e-07, "loss": 0.297, "step": 4160 }, { "epoch": 2.69, "learning_rate": 5.621544336639451e-07, "loss": 0.2959, "step": 4161 }, { "epoch": 2.69, "learning_rate": 5.598510266600632e-07, "loss": 0.2962, "step": 4162 }, { "epoch": 2.69, "learning_rate": 5.575522125014243e-07, "loss": 0.2974, "step": 4163 }, { "epoch": 2.69, "learning_rate": 5.552579923064516e-07, "loss": 0.3026, "step": 4164 }, { "epoch": 2.69, "learning_rate": 5.529683671913377e-07, "loss": 0.2999, "step": 4165 }, { "epoch": 2.69, "learning_rate": 5.506833382700349e-07, "loss": 0.3099, "step": 4166 }, { "epoch": 2.69, "learning_rate": 5.484029066542618e-07, "loss": 0.3045, "step": 4167 }, { "epoch": 2.69, "learning_rate": 5.461270734534973e-07, "loss": 0.3063, "step": 4168 }, { "epoch": 2.69, "learning_rate": 5.438558397749905e-07, "loss": 0.2939, "step": 4169 }, { "epoch": 2.69, "learning_rate": 5.415892067237449e-07, "loss": 0.2968, "step": 4170 }, { "epoch": 2.69, "learning_rate": 5.393271754025287e-07, "loss": 0.2798, "step": 4171 }, { "epoch": 2.69, "learning_rate": 5.370697469118713e-07, "loss": 0.3074, "step": 4172 }, { "epoch": 2.69, "learning_rate": 5.348169223500621e-07, "loss": 0.2905, "step": 4173 }, { "epoch": 2.69, "learning_rate": 5.325687028131521e-07, "loss": 0.3169, "step": 4174 }, { "epoch": 2.7, "learning_rate": 5.303250893949508e-07, "loss": 0.2797, "step": 4175 }, { "epoch": 2.7, "learning_rate": 5.28086083187025e-07, "loss": 0.2929, "step": 4176 }, { "epoch": 2.7, "learning_rate": 5.25851685278701e-07, "loss": 0.3051, "step": 4177 }, { "epoch": 2.7, "learning_rate": 5.236218967570694e-07, "loss": 0.2948, "step": 4178 }, { "epoch": 2.7, "learning_rate": 5.213967187069657e-07, "loss": 0.3026, "step": 4179 }, { "epoch": 2.7, "learning_rate": 5.191761522109939e-07, "loss": 0.2938, "step": 4180 }, { "epoch": 2.7, "learning_rate": 5.169601983495065e-07, "loss": 0.2992, "step": 4181 }, { "epoch": 2.7, "learning_rate": 5.147488582006166e-07, "loss": 0.297, "step": 4182 }, { "epoch": 2.7, "learning_rate": 5.125421328401914e-07, "loss": 0.306, "step": 4183 }, { "epoch": 2.7, "learning_rate": 5.10340023341851e-07, "loss": 0.2947, "step": 4184 }, { "epoch": 2.7, "learning_rate": 5.081425307769739e-07, "loss": 0.3018, "step": 4185 }, { "epoch": 2.7, "learning_rate": 5.059496562146859e-07, "loss": 0.3036, "step": 4186 }, { "epoch": 2.7, "learning_rate": 5.037614007218739e-07, "loss": 0.3018, "step": 4187 }, { "epoch": 2.7, "learning_rate": 5.015777653631693e-07, "loss": 0.2913, "step": 4188 }, { "epoch": 2.7, "learning_rate": 4.993987512009657e-07, "loss": 0.3099, "step": 4189 }, { "epoch": 2.71, "learning_rate": 4.972243592953962e-07, "loss": 0.3096, "step": 4190 }, { "epoch": 2.71, "learning_rate": 4.950545907043569e-07, "loss": 0.2996, "step": 4191 }, { "epoch": 2.71, "learning_rate": 4.928894464834843e-07, "loss": 0.2978, "step": 4192 }, { "epoch": 2.71, "learning_rate": 4.907289276861738e-07, "loss": 0.3114, "step": 4193 }, { "epoch": 2.71, "learning_rate": 4.885730353635643e-07, "loss": 0.2939, "step": 4194 }, { "epoch": 2.71, "learning_rate": 4.864217705645468e-07, "loss": 0.2947, "step": 4195 }, { "epoch": 2.71, "learning_rate": 4.84275134335761e-07, "loss": 0.3041, "step": 4196 }, { "epoch": 2.71, "learning_rate": 4.821331277215901e-07, "loss": 0.3141, "step": 4197 }, { "epoch": 2.71, "learning_rate": 4.799957517641751e-07, "loss": 0.2986, "step": 4198 }, { "epoch": 2.71, "learning_rate": 4.778630075033908e-07, "loss": 0.3019, "step": 4199 }, { "epoch": 2.71, "learning_rate": 4.757348959768704e-07, "loss": 0.3013, "step": 4200 }, { "epoch": 2.71, "learning_rate": 4.736114182199836e-07, "loss": 0.3013, "step": 4201 }, { "epoch": 2.71, "learning_rate": 4.714925752658539e-07, "loss": 0.2895, "step": 4202 }, { "epoch": 2.71, "learning_rate": 4.6937836814534455e-07, "loss": 0.2963, "step": 4203 }, { "epoch": 2.71, "learning_rate": 4.67268797887066e-07, "loss": 0.2898, "step": 4204 }, { "epoch": 2.71, "learning_rate": 4.651638655173707e-07, "loss": 0.2984, "step": 4205 }, { "epoch": 2.72, "learning_rate": 4.6306357206035603e-07, "loss": 0.3055, "step": 4206 }, { "epoch": 2.72, "learning_rate": 4.6096791853786237e-07, "loss": 0.2976, "step": 4207 }, { "epoch": 2.72, "learning_rate": 4.5887690596946975e-07, "loss": 0.3033, "step": 4208 }, { "epoch": 2.72, "learning_rate": 4.567905353725088e-07, "loss": 0.2999, "step": 4209 }, { "epoch": 2.72, "learning_rate": 4.5470880776203985e-07, "loss": 0.305, "step": 4210 }, { "epoch": 2.72, "learning_rate": 4.526317241508738e-07, "loss": 0.2857, "step": 4211 }, { "epoch": 2.72, "learning_rate": 4.5055928554955665e-07, "loss": 0.304, "step": 4212 }, { "epoch": 2.72, "learning_rate": 4.484914929663786e-07, "loss": 0.3079, "step": 4213 }, { "epoch": 2.72, "learning_rate": 4.464283474073661e-07, "loss": 0.307, "step": 4214 }, { "epoch": 2.72, "learning_rate": 4.4436984987628495e-07, "loss": 0.3049, "step": 4215 }, { "epoch": 2.72, "learning_rate": 4.4231600137464305e-07, "loss": 0.299, "step": 4216 }, { "epoch": 2.72, "learning_rate": 4.40266802901681e-07, "loss": 0.306, "step": 4217 }, { "epoch": 2.72, "learning_rate": 4.3822225545438245e-07, "loss": 0.3033, "step": 4218 }, { "epoch": 2.72, "learning_rate": 4.3618236002746395e-07, "loss": 0.2973, "step": 4219 }, { "epoch": 2.72, "learning_rate": 4.341471176133838e-07, "loss": 0.3008, "step": 4220 }, { "epoch": 2.73, "learning_rate": 4.3211652920232996e-07, "loss": 0.3084, "step": 4221 }, { "epoch": 2.73, "learning_rate": 4.30090595782231e-07, "loss": 0.309, "step": 4222 }, { "epoch": 2.73, "learning_rate": 4.2806931833874855e-07, "loss": 0.2955, "step": 4223 }, { "epoch": 2.73, "learning_rate": 4.2605269785528037e-07, "loss": 0.2871, "step": 4224 }, { "epoch": 2.73, "learning_rate": 4.240407353129572e-07, "loss": 0.2717, "step": 4225 }, { "epoch": 2.73, "learning_rate": 4.220334316906438e-07, "loss": 0.3007, "step": 4226 }, { "epoch": 2.73, "learning_rate": 4.200307879649401e-07, "loss": 0.3038, "step": 4227 }, { "epoch": 2.73, "learning_rate": 4.1803280511017564e-07, "loss": 0.2785, "step": 4228 }, { "epoch": 2.73, "learning_rate": 4.16039484098415e-07, "loss": 0.316, "step": 4229 }, { "epoch": 2.73, "learning_rate": 4.1405082589945466e-07, "loss": 0.3051, "step": 4230 }, { "epoch": 2.73, "learning_rate": 4.120668314808185e-07, "loss": 0.2853, "step": 4231 }, { "epoch": 2.73, "learning_rate": 4.100875018077688e-07, "loss": 0.2984, "step": 4232 }, { "epoch": 2.73, "learning_rate": 4.081128378432919e-07, "loss": 0.3003, "step": 4233 }, { "epoch": 2.73, "learning_rate": 4.0614284054810606e-07, "loss": 0.2923, "step": 4234 }, { "epoch": 2.73, "learning_rate": 4.041775108806589e-07, "loss": 0.3054, "step": 4235 }, { "epoch": 2.73, "learning_rate": 4.0221684979712906e-07, "loss": 0.3036, "step": 4236 }, { "epoch": 2.74, "learning_rate": 4.0026085825142225e-07, "loss": 0.2798, "step": 4237 }, { "epoch": 2.74, "learning_rate": 3.983095371951706e-07, "loss": 0.3005, "step": 4238 }, { "epoch": 2.74, "learning_rate": 3.9636288757773923e-07, "loss": 0.2905, "step": 4239 }, { "epoch": 2.74, "learning_rate": 3.9442091034621156e-07, "loss": 0.2943, "step": 4240 }, { "epoch": 2.74, "learning_rate": 3.924836064454107e-07, "loss": 0.2937, "step": 4241 }, { "epoch": 2.74, "learning_rate": 3.9055097681787056e-07, "loss": 0.3002, "step": 4242 }, { "epoch": 2.74, "learning_rate": 3.8862302240386674e-07, "loss": 0.2886, "step": 4243 }, { "epoch": 2.74, "learning_rate": 3.8669974414138553e-07, "loss": 0.3116, "step": 4244 }, { "epoch": 2.74, "learning_rate": 3.8478114296614965e-07, "loss": 0.2893, "step": 4245 }, { "epoch": 2.74, "learning_rate": 3.8286721981160125e-07, "loss": 0.2982, "step": 4246 }, { "epoch": 2.74, "learning_rate": 3.8095797560890547e-07, "loss": 0.3013, "step": 4247 }, { "epoch": 2.74, "learning_rate": 3.7905341128695484e-07, "loss": 0.3037, "step": 4248 }, { "epoch": 2.74, "learning_rate": 3.7715352777236037e-07, "loss": 0.2999, "step": 4249 }, { "epoch": 2.74, "learning_rate": 3.752583259894604e-07, "loss": 0.3012, "step": 4250 }, { "epoch": 2.74, "learning_rate": 3.733678068603108e-07, "loss": 0.2963, "step": 4251 }, { "epoch": 2.75, "learning_rate": 3.7148197130469574e-07, "loss": 0.2874, "step": 4252 }, { "epoch": 2.75, "learning_rate": 3.6960082024011244e-07, "loss": 0.2993, "step": 4253 }, { "epoch": 2.75, "learning_rate": 3.6772435458178546e-07, "loss": 0.2887, "step": 4254 }, { "epoch": 2.75, "learning_rate": 3.658525752426578e-07, "loss": 0.2916, "step": 4255 }, { "epoch": 2.75, "learning_rate": 3.639854831333911e-07, "loss": 0.3182, "step": 4256 }, { "epoch": 2.75, "learning_rate": 3.6212307916236866e-07, "loss": 0.3078, "step": 4257 }, { "epoch": 2.75, "learning_rate": 3.6026536423569126e-07, "loss": 0.3005, "step": 4258 }, { "epoch": 2.75, "learning_rate": 3.5841233925718144e-07, "loss": 0.2915, "step": 4259 }, { "epoch": 2.75, "learning_rate": 3.5656400512837365e-07, "loss": 0.3029, "step": 4260 }, { "epoch": 2.75, "learning_rate": 3.547203627485274e-07, "loss": 0.3185, "step": 4261 }, { "epoch": 2.75, "learning_rate": 3.528814130146141e-07, "loss": 0.299, "step": 4262 }, { "epoch": 2.75, "learning_rate": 3.5104715682132583e-07, "loss": 0.2959, "step": 4263 }, { "epoch": 2.75, "learning_rate": 3.4921759506106876e-07, "loss": 0.2838, "step": 4264 }, { "epoch": 2.75, "learning_rate": 3.4739272862396644e-07, "loss": 0.2814, "step": 4265 }, { "epoch": 2.75, "learning_rate": 3.455725583978575e-07, "loss": 0.3173, "step": 4266 }, { "epoch": 2.75, "learning_rate": 3.437570852682959e-07, "loss": 0.3082, "step": 4267 }, { "epoch": 2.76, "learning_rate": 3.4194631011854827e-07, "loss": 0.2863, "step": 4268 }, { "epoch": 2.76, "learning_rate": 3.401402338295989e-07, "loss": 0.2865, "step": 4269 }, { "epoch": 2.76, "learning_rate": 3.3833885728014713e-07, "loss": 0.2891, "step": 4270 }, { "epoch": 2.76, "learning_rate": 3.3654218134659857e-07, "loss": 0.2736, "step": 4271 }, { "epoch": 2.76, "learning_rate": 3.347502069030795e-07, "loss": 0.3129, "step": 4272 }, { "epoch": 2.76, "learning_rate": 3.3296293482142586e-07, "loss": 0.2921, "step": 4273 }, { "epoch": 2.76, "learning_rate": 3.311803659711854e-07, "loss": 0.3161, "step": 4274 }, { "epoch": 2.76, "learning_rate": 3.294025012196178e-07, "loss": 0.3016, "step": 4275 }, { "epoch": 2.76, "learning_rate": 3.2762934143169333e-07, "loss": 0.3014, "step": 4276 }, { "epoch": 2.76, "learning_rate": 3.2586088747009637e-07, "loss": 0.3061, "step": 4277 }, { "epoch": 2.76, "learning_rate": 3.2409714019521865e-07, "loss": 0.3125, "step": 4278 }, { "epoch": 2.76, "learning_rate": 3.2233810046516154e-07, "loss": 0.2793, "step": 4279 }, { "epoch": 2.76, "learning_rate": 3.205837691357405e-07, "loss": 0.3116, "step": 4280 }, { "epoch": 2.76, "learning_rate": 3.1883414706047276e-07, "loss": 0.2996, "step": 4281 }, { "epoch": 2.76, "learning_rate": 3.1708923509059406e-07, "loss": 0.299, "step": 4282 }, { "epoch": 2.77, "learning_rate": 3.153490340750409e-07, "loss": 0.296, "step": 4283 }, { "epoch": 2.77, "learning_rate": 3.136135448604594e-07, "loss": 0.3085, "step": 4284 }, { "epoch": 2.77, "learning_rate": 3.118827682912062e-07, "loss": 0.309, "step": 4285 }, { "epoch": 2.77, "learning_rate": 3.1015670520934237e-07, "loss": 0.303, "step": 4286 }, { "epoch": 2.77, "learning_rate": 3.0843535645463606e-07, "loss": 0.3137, "step": 4287 }, { "epoch": 2.77, "learning_rate": 3.067187228645618e-07, "loss": 0.3002, "step": 4288 }, { "epoch": 2.77, "learning_rate": 3.050068052743016e-07, "loss": 0.2956, "step": 4289 }, { "epoch": 2.77, "learning_rate": 3.0329960451674025e-07, "loss": 0.2945, "step": 4290 }, { "epoch": 2.77, "learning_rate": 3.0159712142247334e-07, "loss": 0.3028, "step": 4291 }, { "epoch": 2.77, "learning_rate": 2.9989935681979165e-07, "loss": 0.3083, "step": 4292 }, { "epoch": 2.77, "learning_rate": 2.982063115347011e-07, "loss": 0.2998, "step": 4293 }, { "epoch": 2.77, "learning_rate": 2.965179863909029e-07, "loss": 0.2961, "step": 4294 }, { "epoch": 2.77, "learning_rate": 2.9483438220980653e-07, "loss": 0.303, "step": 4295 }, { "epoch": 2.77, "learning_rate": 2.931554998105235e-07, "loss": 0.3155, "step": 4296 }, { "epoch": 2.77, "learning_rate": 2.9148134000986815e-07, "loss": 0.316, "step": 4297 }, { "epoch": 2.77, "learning_rate": 2.8981190362235676e-07, "loss": 0.306, "step": 4298 }, { "epoch": 2.78, "learning_rate": 2.881471914602063e-07, "loss": 0.3057, "step": 4299 }, { "epoch": 2.78, "learning_rate": 2.8648720433334e-07, "loss": 0.2961, "step": 4300 }, { "epoch": 2.78, "learning_rate": 2.8483194304937514e-07, "loss": 0.2996, "step": 4301 }, { "epoch": 2.78, "learning_rate": 2.831814084136375e-07, "loss": 0.313, "step": 4302 }, { "epoch": 2.78, "learning_rate": 2.815356012291459e-07, "loss": 0.2946, "step": 4303 }, { "epoch": 2.78, "learning_rate": 2.798945222966265e-07, "loss": 0.2995, "step": 4304 }, { "epoch": 2.78, "learning_rate": 2.7825817241449615e-07, "loss": 0.2943, "step": 4305 }, { "epoch": 2.78, "learning_rate": 2.7662655237887916e-07, "loss": 0.2989, "step": 4306 }, { "epoch": 2.78, "learning_rate": 2.7499966298359605e-07, "loss": 0.3104, "step": 4307 }, { "epoch": 2.78, "learning_rate": 2.733775050201626e-07, "loss": 0.2853, "step": 4308 }, { "epoch": 2.78, "learning_rate": 2.7176007927779855e-07, "loss": 0.3068, "step": 4309 }, { "epoch": 2.78, "learning_rate": 2.7014738654341453e-07, "loss": 0.2891, "step": 4310 }, { "epoch": 2.78, "learning_rate": 2.685394276016251e-07, "loss": 0.2935, "step": 4311 }, { "epoch": 2.78, "learning_rate": 2.6693620323473556e-07, "loss": 0.3183, "step": 4312 }, { "epoch": 2.78, "learning_rate": 2.6533771422275313e-07, "loss": 0.2923, "step": 4313 }, { "epoch": 2.79, "learning_rate": 2.6374396134337675e-07, "loss": 0.3095, "step": 4314 }, { "epoch": 2.79, "learning_rate": 2.6215494537200405e-07, "loss": 0.2891, "step": 4315 }, { "epoch": 2.79, "learning_rate": 2.605706670817276e-07, "loss": 0.3114, "step": 4316 }, { "epoch": 2.79, "learning_rate": 2.5899112724333433e-07, "loss": 0.2959, "step": 4317 }, { "epoch": 2.79, "learning_rate": 2.574163266253049e-07, "loss": 0.3064, "step": 4318 }, { "epoch": 2.79, "learning_rate": 2.5584626599381545e-07, "loss": 0.3072, "step": 4319 }, { "epoch": 2.79, "learning_rate": 2.5428094611273825e-07, "loss": 0.3038, "step": 4320 }, { "epoch": 2.79, "learning_rate": 2.5272036774363406e-07, "loss": 0.3049, "step": 4321 }, { "epoch": 2.79, "learning_rate": 2.5116453164576114e-07, "loss": 0.3139, "step": 4322 }, { "epoch": 2.79, "learning_rate": 2.4961343857606626e-07, "loss": 0.2964, "step": 4323 }, { "epoch": 2.79, "learning_rate": 2.480670892891934e-07, "loss": 0.3055, "step": 4324 }, { "epoch": 2.79, "learning_rate": 2.465254845374765e-07, "loss": 0.3018, "step": 4325 }, { "epoch": 2.79, "learning_rate": 2.4498862507094103e-07, "loss": 0.3024, "step": 4326 }, { "epoch": 2.79, "learning_rate": 2.434565116373011e-07, "loss": 0.3133, "step": 4327 }, { "epoch": 2.79, "learning_rate": 2.419291449819683e-07, "loss": 0.3056, "step": 4328 }, { "epoch": 2.79, "learning_rate": 2.404065258480381e-07, "loss": 0.2986, "step": 4329 }, { "epoch": 2.8, "learning_rate": 2.388886549763003e-07, "loss": 0.3129, "step": 4330 }, { "epoch": 2.8, "learning_rate": 2.3737553310523187e-07, "loss": 0.3041, "step": 4331 }, { "epoch": 2.8, "learning_rate": 2.358671609710017e-07, "loss": 0.2801, "step": 4332 }, { "epoch": 2.8, "learning_rate": 2.3436353930746837e-07, "loss": 0.3095, "step": 4333 }, { "epoch": 2.8, "learning_rate": 2.3286466884617554e-07, "loss": 0.3133, "step": 4334 }, { "epoch": 2.8, "learning_rate": 2.313705503163577e-07, "loss": 0.2996, "step": 4335 }, { "epoch": 2.8, "learning_rate": 2.298811844449389e-07, "loss": 0.2975, "step": 4336 }, { "epoch": 2.8, "learning_rate": 2.2839657195652732e-07, "loss": 0.2854, "step": 4337 }, { "epoch": 2.8, "learning_rate": 2.2691671357342072e-07, "loss": 0.2853, "step": 4338 }, { "epoch": 2.8, "learning_rate": 2.2544161001560428e-07, "loss": 0.2866, "step": 4339 }, { "epoch": 2.8, "learning_rate": 2.2397126200074837e-07, "loss": 0.3001, "step": 4340 }, { "epoch": 2.8, "learning_rate": 2.2250567024421076e-07, "loss": 0.3006, "step": 4341 }, { "epoch": 2.8, "learning_rate": 2.2104483545903443e-07, "loss": 0.2879, "step": 4342 }, { "epoch": 2.8, "learning_rate": 2.1958875835595084e-07, "loss": 0.3, "step": 4343 }, { "epoch": 2.8, "learning_rate": 2.1813743964336998e-07, "loss": 0.3114, "step": 4344 }, { "epoch": 2.81, "learning_rate": 2.166908800273948e-07, "loss": 0.3049, "step": 4345 }, { "epoch": 2.81, "learning_rate": 2.152490802118079e-07, "loss": 0.3168, "step": 4346 }, { "epoch": 2.81, "learning_rate": 2.1381204089807705e-07, "loss": 0.3067, "step": 4347 }, { "epoch": 2.81, "learning_rate": 2.1237976278535522e-07, "loss": 0.2909, "step": 4348 }, { "epoch": 2.81, "learning_rate": 2.109522465704761e-07, "loss": 0.3045, "step": 4349 }, { "epoch": 2.81, "learning_rate": 2.0952949294796077e-07, "loss": 0.2971, "step": 4350 }, { "epoch": 2.81, "learning_rate": 2.081115026100089e-07, "loss": 0.2949, "step": 4351 }, { "epoch": 2.81, "learning_rate": 2.066982762465075e-07, "loss": 0.2906, "step": 4352 }, { "epoch": 2.81, "learning_rate": 2.0528981454501996e-07, "loss": 0.3032, "step": 4353 }, { "epoch": 2.81, "learning_rate": 2.0388611819079696e-07, "loss": 0.3048, "step": 4354 }, { "epoch": 2.81, "learning_rate": 2.0248718786676558e-07, "loss": 0.2891, "step": 4355 }, { "epoch": 2.81, "learning_rate": 2.0109302425354139e-07, "loss": 0.3064, "step": 4356 }, { "epoch": 2.81, "learning_rate": 1.997036280294118e-07, "loss": 0.3008, "step": 4357 }, { "epoch": 2.81, "learning_rate": 1.9831899987035276e-07, "loss": 0.3013, "step": 4358 }, { "epoch": 2.81, "learning_rate": 1.9693914045001428e-07, "loss": 0.3024, "step": 4359 }, { "epoch": 2.81, "learning_rate": 1.9556405043973158e-07, "loss": 0.3067, "step": 4360 }, { "epoch": 2.82, "learning_rate": 1.941937305085162e-07, "loss": 0.2802, "step": 4361 }, { "epoch": 2.82, "learning_rate": 1.9282818132305813e-07, "loss": 0.2802, "step": 4362 }, { "epoch": 2.82, "learning_rate": 1.9146740354773042e-07, "loss": 0.2896, "step": 4363 }, { "epoch": 2.82, "learning_rate": 1.901113978445801e-07, "loss": 0.2935, "step": 4364 }, { "epoch": 2.82, "learning_rate": 1.8876016487333614e-07, "loss": 0.294, "step": 4365 }, { "epoch": 2.82, "learning_rate": 1.8741370529140046e-07, "loss": 0.2865, "step": 4366 }, { "epoch": 2.82, "learning_rate": 1.8607201975386013e-07, "loss": 0.3005, "step": 4367 }, { "epoch": 2.82, "learning_rate": 1.8473510891347412e-07, "loss": 0.2965, "step": 4368 }, { "epoch": 2.82, "learning_rate": 1.834029734206777e-07, "loss": 0.3018, "step": 4369 }, { "epoch": 2.82, "learning_rate": 1.8207561392358796e-07, "loss": 0.2905, "step": 4370 }, { "epoch": 2.82, "learning_rate": 1.807530310679917e-07, "loss": 0.2886, "step": 4371 }, { "epoch": 2.82, "learning_rate": 1.794352254973597e-07, "loss": 0.2915, "step": 4372 }, { "epoch": 2.82, "learning_rate": 1.7812219785282913e-07, "loss": 0.2797, "step": 4373 }, { "epoch": 2.82, "learning_rate": 1.7681394877322234e-07, "loss": 0.2963, "step": 4374 }, { "epoch": 2.82, "learning_rate": 1.7551047889502793e-07, "loss": 0.3098, "step": 4375 }, { "epoch": 2.83, "learning_rate": 1.742117888524153e-07, "loss": 0.3025, "step": 4376 }, { "epoch": 2.83, "learning_rate": 1.7291787927722681e-07, "loss": 0.284, "step": 4377 }, { "epoch": 2.83, "learning_rate": 1.7162875079897778e-07, "loss": 0.3017, "step": 4378 }, { "epoch": 2.83, "learning_rate": 1.7034440404485764e-07, "loss": 0.3071, "step": 4379 }, { "epoch": 2.83, "learning_rate": 1.6906483963973207e-07, "loss": 0.3014, "step": 4380 }, { "epoch": 2.83, "learning_rate": 1.6779005820613647e-07, "loss": 0.3099, "step": 4381 }, { "epoch": 2.83, "learning_rate": 1.6652006036428025e-07, "loss": 0.2914, "step": 4382 }, { "epoch": 2.83, "learning_rate": 1.6525484673204807e-07, "loss": 0.3263, "step": 4383 }, { "epoch": 2.83, "learning_rate": 1.6399441792499305e-07, "loss": 0.2979, "step": 4384 }, { "epoch": 2.83, "learning_rate": 1.6273877455634245e-07, "loss": 0.3044, "step": 4385 }, { "epoch": 2.83, "learning_rate": 1.6148791723699652e-07, "loss": 0.2965, "step": 4386 }, { "epoch": 2.83, "learning_rate": 1.6024184657552398e-07, "loss": 0.2921, "step": 4387 }, { "epoch": 2.83, "learning_rate": 1.590005631781666e-07, "loss": 0.2919, "step": 4388 }, { "epoch": 2.83, "learning_rate": 1.5776406764883678e-07, "loss": 0.2826, "step": 4389 }, { "epoch": 2.83, "learning_rate": 1.565323605891178e-07, "loss": 0.2985, "step": 4390 }, { "epoch": 2.83, "learning_rate": 1.5530544259826254e-07, "loss": 0.3005, "step": 4391 }, { "epoch": 2.84, "learning_rate": 1.5408331427319345e-07, "loss": 0.2925, "step": 4392 }, { "epoch": 2.84, "learning_rate": 1.5286597620850606e-07, "loss": 0.3141, "step": 4393 }, { "epoch": 2.84, "learning_rate": 1.5165342899645997e-07, "loss": 0.3006, "step": 4394 }, { "epoch": 2.84, "learning_rate": 1.5044567322698768e-07, "loss": 0.3141, "step": 4395 }, { "epoch": 2.84, "learning_rate": 1.4924270948769027e-07, "loss": 0.2971, "step": 4396 }, { "epoch": 2.84, "learning_rate": 1.4804453836383736e-07, "loss": 0.3082, "step": 4397 }, { "epoch": 2.84, "learning_rate": 1.468511604383638e-07, "loss": 0.3046, "step": 4398 }, { "epoch": 2.84, "learning_rate": 1.456625762918762e-07, "loss": 0.3287, "step": 4399 }, { "epoch": 2.84, "learning_rate": 1.4447878650264867e-07, "loss": 0.332, "step": 4400 }, { "epoch": 2.84, "learning_rate": 1.4329979164661945e-07, "loss": 0.3079, "step": 4401 }, { "epoch": 2.84, "learning_rate": 1.421255922973974e-07, "loss": 0.3143, "step": 4402 }, { "epoch": 2.84, "learning_rate": 1.4095618902625562e-07, "loss": 0.302, "step": 4403 }, { "epoch": 2.84, "learning_rate": 1.3979158240213787e-07, "loss": 0.3018, "step": 4404 }, { "epoch": 2.84, "learning_rate": 1.3863177299164864e-07, "loss": 0.2903, "step": 4405 }, { "epoch": 2.84, "learning_rate": 1.374767613590644e-07, "loss": 0.2941, "step": 4406 }, { "epoch": 2.85, "learning_rate": 1.3632654806632006e-07, "loss": 0.3139, "step": 4407 }, { "epoch": 2.85, "learning_rate": 1.3518113367302356e-07, "loss": 0.2961, "step": 4408 }, { "epoch": 2.85, "learning_rate": 1.3404051873644464e-07, "loss": 0.2862, "step": 4409 }, { "epoch": 2.85, "learning_rate": 1.3290470381151722e-07, "loss": 0.3025, "step": 4410 }, { "epoch": 2.85, "learning_rate": 1.3177368945084145e-07, "loss": 0.3007, "step": 4411 }, { "epoch": 2.85, "learning_rate": 1.3064747620468053e-07, "loss": 0.2875, "step": 4412 }, { "epoch": 2.85, "learning_rate": 1.2952606462096394e-07, "loss": 0.2918, "step": 4413 }, { "epoch": 2.85, "learning_rate": 1.2840945524528193e-07, "loss": 0.291, "step": 4414 }, { "epoch": 2.85, "learning_rate": 1.2729764862089212e-07, "loss": 0.2957, "step": 4415 }, { "epoch": 2.85, "learning_rate": 1.2619064528871185e-07, "loss": 0.3072, "step": 4416 }, { "epoch": 2.85, "learning_rate": 1.250884457873247e-07, "loss": 0.2874, "step": 4417 }, { "epoch": 2.85, "learning_rate": 1.239910506529729e-07, "loss": 0.3008, "step": 4418 }, { "epoch": 2.85, "learning_rate": 1.2289846041956599e-07, "loss": 0.3123, "step": 4419 }, { "epoch": 2.85, "learning_rate": 1.218106756186743e-07, "loss": 0.2929, "step": 4420 }, { "epoch": 2.85, "learning_rate": 1.207276967795279e-07, "loss": 0.3209, "step": 4421 }, { "epoch": 2.85, "learning_rate": 1.1964952442902188e-07, "loss": 0.2987, "step": 4422 }, { "epoch": 2.86, "learning_rate": 1.1857615909171005e-07, "loss": 0.3066, "step": 4423 }, { "epoch": 2.86, "learning_rate": 1.1750760128981131e-07, "loss": 0.3161, "step": 4424 }, { "epoch": 2.86, "learning_rate": 1.1644385154319982e-07, "loss": 0.2784, "step": 4425 }, { "epoch": 2.86, "learning_rate": 1.1538491036941602e-07, "loss": 0.2964, "step": 4426 }, { "epoch": 2.86, "learning_rate": 1.1433077828365779e-07, "loss": 0.3017, "step": 4427 }, { "epoch": 2.86, "learning_rate": 1.132814557987838e-07, "loss": 0.2915, "step": 4428 }, { "epoch": 2.86, "learning_rate": 1.1223694342531455e-07, "loss": 0.3055, "step": 4429 }, { "epoch": 2.86, "learning_rate": 1.1119724167142687e-07, "loss": 0.3179, "step": 4430 }, { "epoch": 2.86, "learning_rate": 1.1016235104296169e-07, "loss": 0.3062, "step": 4431 }, { "epoch": 2.86, "learning_rate": 1.0913227204341292e-07, "loss": 0.297, "step": 4432 }, { "epoch": 2.86, "learning_rate": 1.081070051739419e-07, "loss": 0.2931, "step": 4433 }, { "epoch": 2.86, "learning_rate": 1.0708655093335963e-07, "loss": 0.2784, "step": 4434 }, { "epoch": 2.86, "learning_rate": 1.0607090981814339e-07, "loss": 0.2971, "step": 4435 }, { "epoch": 2.86, "learning_rate": 1.0506008232242348e-07, "loss": 0.2979, "step": 4436 }, { "epoch": 2.86, "learning_rate": 1.0405406893799097e-07, "loss": 0.2964, "step": 4437 }, { "epoch": 2.87, "learning_rate": 1.0305287015429433e-07, "loss": 0.2853, "step": 4438 }, { "epoch": 2.87, "learning_rate": 1.0205648645843836e-07, "loss": 0.2873, "step": 4439 }, { "epoch": 2.87, "learning_rate": 1.010649183351875e-07, "loss": 0.2933, "step": 4440 }, { "epoch": 2.87, "learning_rate": 1.0007816626696032e-07, "loss": 0.3167, "step": 4441 }, { "epoch": 2.87, "learning_rate": 9.909623073383612e-08, "loss": 0.2896, "step": 4442 }, { "epoch": 2.87, "learning_rate": 9.81191122135472e-08, "loss": 0.3029, "step": 4443 }, { "epoch": 2.87, "learning_rate": 9.714681118148329e-08, "loss": 0.3041, "step": 4444 }, { "epoch": 2.87, "learning_rate": 9.617932811069153e-08, "loss": 0.292, "step": 4445 }, { "epoch": 2.87, "learning_rate": 9.52166634718743e-08, "loss": 0.3107, "step": 4446 }, { "epoch": 2.87, "learning_rate": 9.425881773339029e-08, "loss": 0.3111, "step": 4447 }, { "epoch": 2.87, "learning_rate": 9.330579136125117e-08, "loss": 0.2941, "step": 4448 }, { "epoch": 2.87, "learning_rate": 9.235758481912715e-08, "loss": 0.2772, "step": 4449 }, { "epoch": 2.87, "learning_rate": 9.141419856834255e-08, "loss": 0.3016, "step": 4450 }, { "epoch": 2.87, "learning_rate": 9.047563306787466e-08, "loss": 0.2948, "step": 4451 }, { "epoch": 2.87, "learning_rate": 8.95418887743571e-08, "loss": 0.3023, "step": 4452 }, { "epoch": 2.87, "learning_rate": 8.861296614207871e-08, "loss": 0.3044, "step": 4453 }, { "epoch": 2.88, "learning_rate": 8.768886562298018e-08, "loss": 0.294, "step": 4454 }, { "epoch": 2.88, "learning_rate": 8.676958766665633e-08, "loss": 0.3054, "step": 4455 }, { "epoch": 2.88, "learning_rate": 8.58551327203594e-08, "loss": 0.2869, "step": 4456 }, { "epoch": 2.88, "learning_rate": 8.494550122898903e-08, "loss": 0.3104, "step": 4457 }, { "epoch": 2.88, "learning_rate": 8.404069363510348e-08, "loss": 0.3115, "step": 4458 }, { "epoch": 2.88, "learning_rate": 8.314071037891169e-08, "loss": 0.2963, "step": 4459 }, { "epoch": 2.88, "learning_rate": 8.224555189827565e-08, "loss": 0.2967, "step": 4460 }, { "epoch": 2.88, "learning_rate": 8.135521862871144e-08, "loss": 0.3008, "step": 4461 }, { "epoch": 2.88, "learning_rate": 8.046971100338363e-08, "loss": 0.3162, "step": 4462 }, { "epoch": 2.88, "learning_rate": 7.958902945311319e-08, "loss": 0.2936, "step": 4463 }, { "epoch": 2.88, "learning_rate": 7.871317440637072e-08, "loss": 0.2969, "step": 4464 }, { "epoch": 2.88, "learning_rate": 7.784214628928199e-08, "loss": 0.289, "step": 4465 }, { "epoch": 2.88, "learning_rate": 7.697594552561805e-08, "loss": 0.2899, "step": 4466 }, { "epoch": 2.88, "learning_rate": 7.611457253680843e-08, "loss": 0.2929, "step": 4467 }, { "epoch": 2.88, "learning_rate": 7.525802774192791e-08, "loss": 0.302, "step": 4468 }, { "epoch": 2.89, "learning_rate": 7.44063115577065e-08, "loss": 0.2988, "step": 4469 }, { "epoch": 2.89, "learning_rate": 7.355942439852271e-08, "loss": 0.2958, "step": 4470 }, { "epoch": 2.89, "learning_rate": 7.271736667640694e-08, "loss": 0.288, "step": 4471 }, { "epoch": 2.89, "learning_rate": 7.188013880103817e-08, "loss": 0.2978, "step": 4472 }, { "epoch": 2.89, "learning_rate": 7.104774117974611e-08, "loss": 0.2877, "step": 4473 }, { "epoch": 2.89, "learning_rate": 7.022017421751348e-08, "loss": 0.32, "step": 4474 }, { "epoch": 2.89, "learning_rate": 6.93974383169671e-08, "loss": 0.3205, "step": 4475 }, { "epoch": 2.89, "learning_rate": 6.857953387839012e-08, "loss": 0.3091, "step": 4476 }, { "epoch": 2.89, "learning_rate": 6.776646129970754e-08, "loss": 0.2955, "step": 4477 }, { "epoch": 2.89, "learning_rate": 6.695822097650073e-08, "loss": 0.2925, "step": 4478 }, { "epoch": 2.89, "learning_rate": 6.615481330199514e-08, "loss": 0.2915, "step": 4479 }, { "epoch": 2.89, "learning_rate": 6.535623866706698e-08, "loss": 0.2925, "step": 4480 }, { "epoch": 2.89, "learning_rate": 6.456249746024101e-08, "loss": 0.313, "step": 4481 }, { "epoch": 2.89, "learning_rate": 6.377359006769057e-08, "loss": 0.2945, "step": 4482 }, { "epoch": 2.89, "learning_rate": 6.29895168732364e-08, "loss": 0.3075, "step": 4483 }, { "epoch": 2.9, "learning_rate": 6.22102782583478e-08, "loss": 0.3127, "step": 4484 }, { "epoch": 2.9, "learning_rate": 6.143587460214151e-08, "loss": 0.3005, "step": 4485 }, { "epoch": 2.9, "learning_rate": 6.066630628138282e-08, "loss": 0.277, "step": 4486 }, { "epoch": 2.9, "learning_rate": 5.990157367048554e-08, "loss": 0.2888, "step": 4487 }, { "epoch": 2.9, "learning_rate": 5.9141677141506536e-08, "loss": 0.2997, "step": 4488 }, { "epoch": 2.9, "learning_rate": 5.838661706415338e-08, "loss": 0.3221, "step": 4489 }, { "epoch": 2.9, "learning_rate": 5.763639380578112e-08, "loss": 0.2951, "step": 4490 }, { "epoch": 2.9, "learning_rate": 5.689100773139e-08, "loss": 0.3105, "step": 4491 }, { "epoch": 2.9, "learning_rate": 5.61504592036255e-08, "loss": 0.3079, "step": 4492 }, { "epoch": 2.9, "learning_rate": 5.5414748582783836e-08, "loss": 0.3082, "step": 4493 }, { "epoch": 2.9, "learning_rate": 5.468387622680205e-08, "loss": 0.3004, "step": 4494 }, { "epoch": 2.9, "learning_rate": 5.3957842491267895e-08, "loss": 0.306, "step": 4495 }, { "epoch": 2.9, "learning_rate": 5.323664772941217e-08, "loss": 0.2896, "step": 4496 }, { "epoch": 2.9, "learning_rate": 5.2520292292113086e-08, "loss": 0.2835, "step": 4497 }, { "epoch": 2.9, "learning_rate": 5.1808776527892955e-08, "loss": 0.3021, "step": 4498 }, { "epoch": 2.9, "learning_rate": 5.1102100782920437e-08, "loss": 0.3068, "step": 4499 }, { "epoch": 2.91, "learning_rate": 5.0400265401009395e-08, "loss": 0.2974, "step": 4500 }, { "epoch": 2.91, "learning_rate": 4.9703270723618915e-08, "loss": 0.3026, "step": 4501 }, { "epoch": 2.91, "learning_rate": 4.901111708985329e-08, "loss": 0.2951, "step": 4502 }, { "epoch": 2.91, "learning_rate": 4.832380483645871e-08, "loss": 0.2998, "step": 4503 }, { "epoch": 2.91, "learning_rate": 4.76413342978288e-08, "loss": 0.2932, "step": 4504 }, { "epoch": 2.91, "learning_rate": 4.6963705806002405e-08, "loss": 0.2933, "step": 4505 }, { "epoch": 2.91, "learning_rate": 4.629091969065913e-08, "loss": 0.2996, "step": 4506 }, { "epoch": 2.91, "learning_rate": 4.562297627912493e-08, "loss": 0.3225, "step": 4507 }, { "epoch": 2.91, "learning_rate": 4.4959875896370965e-08, "loss": 0.2992, "step": 4508 }, { "epoch": 2.91, "learning_rate": 4.430161886500806e-08, "loss": 0.295, "step": 4509 }, { "epoch": 2.91, "learning_rate": 4.364820550529558e-08, "loss": 0.2888, "step": 4510 }, { "epoch": 2.91, "learning_rate": 4.2999636135132583e-08, "loss": 0.3104, "step": 4511 }, { "epoch": 2.91, "learning_rate": 4.2355911070062205e-08, "loss": 0.2878, "step": 4512 }, { "epoch": 2.91, "learning_rate": 4.17170306232717e-08, "loss": 0.2824, "step": 4513 }, { "epoch": 2.91, "learning_rate": 4.108299510559022e-08, "loss": 0.3008, "step": 4514 }, { "epoch": 2.92, "learning_rate": 4.04538048254921e-08, "loss": 0.2899, "step": 4515 }, { "epoch": 2.92, "learning_rate": 3.982946008909139e-08, "loss": 0.2858, "step": 4516 }, { "epoch": 2.92, "learning_rate": 3.920996120014509e-08, "loss": 0.2956, "step": 4517 }, { "epoch": 2.92, "learning_rate": 3.8595308460054326e-08, "loss": 0.3069, "step": 4518 }, { "epoch": 2.92, "learning_rate": 3.798550216786212e-08, "loss": 0.2932, "step": 4519 }, { "epoch": 2.92, "learning_rate": 3.738054262025226e-08, "loss": 0.291, "step": 4520 }, { "epoch": 2.92, "learning_rate": 3.6780430111550415e-08, "loss": 0.2915, "step": 4521 }, { "epoch": 2.92, "learning_rate": 3.618516493372637e-08, "loss": 0.3108, "step": 4522 }, { "epoch": 2.92, "learning_rate": 3.559474737638846e-08, "loss": 0.2873, "step": 4523 }, { "epoch": 2.92, "learning_rate": 3.500917772679025e-08, "loss": 0.2998, "step": 4524 }, { "epoch": 2.92, "learning_rate": 3.4428456269821606e-08, "loss": 0.3022, "step": 4525 }, { "epoch": 2.92, "learning_rate": 3.385258328801988e-08, "loss": 0.3136, "step": 4526 }, { "epoch": 2.92, "learning_rate": 3.328155906155761e-08, "loss": 0.2879, "step": 4527 }, { "epoch": 2.92, "learning_rate": 3.271538386825257e-08, "loss": 0.3163, "step": 4528 }, { "epoch": 2.92, "learning_rate": 3.215405798356108e-08, "loss": 0.3093, "step": 4529 }, { "epoch": 2.92, "learning_rate": 3.1597581680581356e-08, "loss": 0.2969, "step": 4530 }, { "epoch": 2.93, "learning_rate": 3.104595523005016e-08, "loss": 0.2944, "step": 4531 }, { "epoch": 2.93, "learning_rate": 3.049917890034837e-08, "loss": 0.284, "step": 4532 }, { "epoch": 2.93, "learning_rate": 2.9957252957493186e-08, "loss": 0.2875, "step": 4533 }, { "epoch": 2.93, "learning_rate": 2.9420177665144823e-08, "loss": 0.2936, "step": 4534 }, { "epoch": 2.93, "learning_rate": 2.888795328460314e-08, "loss": 0.3011, "step": 4535 }, { "epoch": 2.93, "learning_rate": 2.8360580074804355e-08, "loss": 0.2988, "step": 4536 }, { "epoch": 2.93, "learning_rate": 2.7838058292330995e-08, "loss": 0.2996, "step": 4537 }, { "epoch": 2.93, "learning_rate": 2.7320388191399704e-08, "loss": 0.3143, "step": 4538 }, { "epoch": 2.93, "learning_rate": 2.6807570023869024e-08, "loss": 0.2994, "step": 4539 }, { "epoch": 2.93, "learning_rate": 2.6299604039237147e-08, "loss": 0.2827, "step": 4540 }, { "epoch": 2.93, "learning_rate": 2.5796490484639723e-08, "loss": 0.2907, "step": 4541 }, { "epoch": 2.93, "learning_rate": 2.5298229604854286e-08, "loss": 0.3023, "step": 4542 }, { "epoch": 2.93, "learning_rate": 2.4804821642294696e-08, "loss": 0.3077, "step": 4543 }, { "epoch": 2.93, "learning_rate": 2.43162668370156e-08, "loss": 0.3281, "step": 4544 }, { "epoch": 2.93, "learning_rate": 2.3832565426709086e-08, "loss": 0.304, "step": 4545 }, { "epoch": 2.94, "learning_rate": 2.335371764670913e-08, "loss": 0.2929, "step": 4546 }, { "epoch": 2.94, "learning_rate": 2.2879723729983815e-08, "loss": 0.3033, "step": 4547 }, { "epoch": 2.94, "learning_rate": 2.2410583907142012e-08, "loss": 0.2927, "step": 4548 }, { "epoch": 2.94, "learning_rate": 2.1946298406432255e-08, "loss": 0.297, "step": 4549 }, { "epoch": 2.94, "learning_rate": 2.148686745373829e-08, "loss": 0.3102, "step": 4550 }, { "epoch": 2.94, "learning_rate": 2.1032291272584658e-08, "loss": 0.3118, "step": 4551 }, { "epoch": 2.94, "learning_rate": 2.0582570084132224e-08, "loss": 0.2975, "step": 4552 }, { "epoch": 2.94, "learning_rate": 2.013770410718152e-08, "loss": 0.2897, "step": 4553 }, { "epoch": 2.94, "learning_rate": 1.9697693558168305e-08, "loss": 0.2994, "step": 4554 }, { "epoch": 2.94, "learning_rate": 1.9262538651169115e-08, "loss": 0.2855, "step": 4555 }, { "epoch": 2.94, "learning_rate": 1.883223959789571e-08, "loss": 0.293, "step": 4556 }, { "epoch": 2.94, "learning_rate": 1.8406796607698397e-08, "loss": 0.291, "step": 4557 }, { "epoch": 2.94, "learning_rate": 1.7986209887564943e-08, "loss": 0.3181, "step": 4558 }, { "epoch": 2.94, "learning_rate": 1.757047964212055e-08, "loss": 0.3023, "step": 4559 }, { "epoch": 2.94, "learning_rate": 1.7159606073627875e-08, "loss": 0.2844, "step": 4560 }, { "epoch": 2.94, "learning_rate": 1.6753589381983682e-08, "loss": 0.3088, "step": 4561 }, { "epoch": 2.95, "learning_rate": 1.635242976472773e-08, "loss": 0.293, "step": 4562 }, { "epoch": 2.95, "learning_rate": 1.5956127417030565e-08, "loss": 0.2972, "step": 4563 }, { "epoch": 2.95, "learning_rate": 1.5564682531702402e-08, "loss": 0.3053, "step": 4564 }, { "epoch": 2.95, "learning_rate": 1.5178095299192008e-08, "loss": 0.2929, "step": 4565 }, { "epoch": 2.95, "learning_rate": 1.4796365907580046e-08, "loss": 0.288, "step": 4566 }, { "epoch": 2.95, "learning_rate": 1.4419494542589063e-08, "loss": 0.2975, "step": 4567 }, { "epoch": 2.95, "learning_rate": 1.4047481387573503e-08, "loss": 0.3034, "step": 4568 }, { "epoch": 2.95, "learning_rate": 1.3680326623526364e-08, "loss": 0.3051, "step": 4569 }, { "epoch": 2.95, "learning_rate": 1.3318030429078089e-08, "loss": 0.3001, "step": 4570 }, { "epoch": 2.95, "learning_rate": 1.2960592980493237e-08, "loss": 0.29, "step": 4571 }, { "epoch": 2.95, "learning_rate": 1.2608014451672701e-08, "loss": 0.2967, "step": 4572 }, { "epoch": 2.95, "learning_rate": 1.2260295014154822e-08, "loss": 0.313, "step": 4573 }, { "epoch": 2.95, "learning_rate": 1.1917434837112052e-08, "loss": 0.2893, "step": 4574 }, { "epoch": 2.95, "learning_rate": 1.1579434087354291e-08, "loss": 0.305, "step": 4575 }, { "epoch": 2.95, "learning_rate": 1.1246292929325552e-08, "loss": 0.2999, "step": 4576 }, { "epoch": 2.96, "learning_rate": 1.0918011525107297e-08, "loss": 0.308, "step": 4577 }, { "epoch": 2.96, "learning_rate": 1.0594590034415097e-08, "loss": 0.2825, "step": 4578 }, { "epoch": 2.96, "learning_rate": 1.0276028614601974e-08, "loss": 0.2825, "step": 4579 }, { "epoch": 2.96, "learning_rate": 9.96232742065506e-09, "loss": 0.3115, "step": 4580 }, { "epoch": 2.96, "learning_rate": 9.653486605195606e-09, "loss": 0.3016, "step": 4581 }, { "epoch": 2.96, "learning_rate": 9.349506318483414e-09, "loss": 0.2913, "step": 4582 }, { "epoch": 2.96, "learning_rate": 9.050386708410186e-09, "loss": 0.2935, "step": 4583 }, { "epoch": 2.96, "learning_rate": 8.756127920505065e-09, "loss": 0.3099, "step": 4584 }, { "epoch": 2.96, "learning_rate": 8.466730097932418e-09, "loss": 0.2966, "step": 4585 }, { "epoch": 2.96, "learning_rate": 8.182193381489622e-09, "loss": 0.2877, "step": 4586 }, { "epoch": 2.96, "learning_rate": 7.902517909611496e-09, "loss": 0.2963, "step": 4587 }, { "epoch": 2.96, "learning_rate": 7.627703818363642e-09, "loss": 0.3023, "step": 4588 }, { "epoch": 2.96, "learning_rate": 7.357751241452438e-09, "loss": 0.2918, "step": 4589 }, { "epoch": 2.96, "learning_rate": 7.092660310215049e-09, "loss": 0.3044, "step": 4590 }, { "epoch": 2.96, "learning_rate": 6.832431153623864e-09, "loss": 0.3092, "step": 4591 }, { "epoch": 2.96, "learning_rate": 6.577063898285385e-09, "loss": 0.3025, "step": 4592 }, { "epoch": 2.97, "learning_rate": 6.326558668442451e-09, "loss": 0.3097, "step": 4593 }, { "epoch": 2.97, "learning_rate": 6.080915585972014e-09, "loss": 0.308, "step": 4594 }, { "epoch": 2.97, "learning_rate": 5.840134770384032e-09, "loss": 0.2897, "step": 4595 }, { "epoch": 2.97, "learning_rate": 5.604216338824797e-09, "loss": 0.3153, "step": 4596 }, { "epoch": 2.97, "learning_rate": 5.373160406072498e-09, "loss": 0.3044, "step": 4597 }, { "epoch": 2.97, "learning_rate": 5.1469670845416545e-09, "loss": 0.2944, "step": 4598 }, { "epoch": 2.97, "learning_rate": 4.9256364842809045e-09, "loss": 0.2942, "step": 4599 }, { "epoch": 2.97, "learning_rate": 4.7091687129718896e-09, "loss": 0.296, "step": 4600 }, { "epoch": 2.97, "learning_rate": 4.497563875932587e-09, "loss": 0.3079, "step": 4601 }, { "epoch": 2.97, "learning_rate": 4.290822076111756e-09, "loss": 0.2971, "step": 4602 }, { "epoch": 2.97, "learning_rate": 4.088943414094493e-09, "loss": 0.2967, "step": 4603 }, { "epoch": 2.97, "learning_rate": 3.891927988098898e-09, "loss": 0.283, "step": 4604 }, { "epoch": 2.97, "learning_rate": 3.699775893978297e-09, "loss": 0.3107, "step": 4605 }, { "epoch": 2.97, "learning_rate": 3.512487225219019e-09, "loss": 0.2951, "step": 4606 }, { "epoch": 2.97, "learning_rate": 3.3300620729403987e-09, "loss": 0.2947, "step": 4607 }, { "epoch": 2.98, "learning_rate": 3.1525005258969953e-09, "loss": 0.3087, "step": 4608 }, { "epoch": 2.98, "learning_rate": 2.9798026704752625e-09, "loss": 0.2948, "step": 4609 }, { "epoch": 2.98, "learning_rate": 2.811968590699099e-09, "loss": 0.2985, "step": 4610 }, { "epoch": 2.98, "learning_rate": 2.6489983682220777e-09, "loss": 0.3115, "step": 4611 }, { "epoch": 2.98, "learning_rate": 2.490892082331886e-09, "loss": 0.2872, "step": 4612 }, { "epoch": 2.98, "learning_rate": 2.337649809952547e-09, "loss": 0.3042, "step": 4613 }, { "epoch": 2.98, "learning_rate": 2.1892716256388667e-09, "loss": 0.3027, "step": 4614 }, { "epoch": 2.98, "learning_rate": 2.0457576015808776e-09, "loss": 0.2753, "step": 4615 }, { "epoch": 2.98, "learning_rate": 1.907107807600506e-09, "loss": 0.307, "step": 4616 }, { "epoch": 2.98, "learning_rate": 1.7733223111549013e-09, "loss": 0.2737, "step": 4617 }, { "epoch": 2.98, "learning_rate": 1.6444011773331103e-09, "loss": 0.2841, "step": 4618 }, { "epoch": 2.98, "learning_rate": 1.5203444688582924e-09, "loss": 0.2995, "step": 4619 }, { "epoch": 2.98, "learning_rate": 1.4011522460866122e-09, "loss": 0.2846, "step": 4620 }, { "epoch": 2.98, "learning_rate": 1.2868245670083491e-09, "loss": 0.2838, "step": 4621 }, { "epoch": 2.98, "learning_rate": 1.1773614872467864e-09, "loss": 0.2742, "step": 4622 }, { "epoch": 2.98, "learning_rate": 1.0727630600571027e-09, "loss": 0.2911, "step": 4623 }, { "epoch": 2.99, "learning_rate": 9.730293363297006e-10, "loss": 0.3095, "step": 4624 }, { "epoch": 2.99, "learning_rate": 8.781603645857673e-10, "loss": 0.3049, "step": 4625 }, { "epoch": 2.99, "learning_rate": 7.881561909828251e-10, "loss": 0.2957, "step": 4626 }, { "epoch": 2.99, "learning_rate": 7.030168593102904e-10, "loss": 0.2931, "step": 4627 }, { "epoch": 2.99, "learning_rate": 6.227424109883639e-10, "loss": 0.3134, "step": 4628 }, { "epoch": 2.99, "learning_rate": 5.473328850735815e-10, "loss": 0.3078, "step": 4629 }, { "epoch": 2.99, "learning_rate": 4.767883182554834e-10, "loss": 0.3003, "step": 4630 }, { "epoch": 2.99, "learning_rate": 4.111087448532836e-10, "loss": 0.2714, "step": 4631 }, { "epoch": 2.99, "learning_rate": 3.502941968225315e-10, "loss": 0.2966, "step": 4632 }, { "epoch": 2.99, "learning_rate": 2.9434470375178105e-10, "loss": 0.3043, "step": 4633 }, { "epoch": 2.99, "learning_rate": 2.4326029286037003e-10, "loss": 0.296, "step": 4634 }, { "epoch": 2.99, "learning_rate": 1.970409890028613e-10, "loss": 0.3266, "step": 4635 }, { "epoch": 2.99, "learning_rate": 1.5568681466682223e-10, "loss": 0.2755, "step": 4636 }, { "epoch": 2.99, "learning_rate": 1.1919778996949406e-10, "loss": 0.3062, "step": 4637 }, { "epoch": 2.99, "learning_rate": 8.757393266556336e-11, "loss": 0.292, "step": 4638 }, { "epoch": 3.0, "learning_rate": 6.081525814050082e-11, "loss": 0.3055, "step": 4639 }, { "epoch": 3.0, "learning_rate": 3.8921779411671414e-11, "loss": 0.3234, "step": 4640 }, { "epoch": 3.0, "learning_rate": 2.189350713277527e-11, "loss": 0.2912, "step": 4641 }, { "epoch": 3.0, "learning_rate": 9.730449587186385e-12, "loss": 0.2979, "step": 4642 }, { "epoch": 3.0, "learning_rate": 2.432612692393477e-12, "loss": 0.2934, "step": 4643 }, { "epoch": 3.0, "learning_rate": 0.0, "loss": 0.3003, "step": 4644 }, { "epoch": 3.0, "step": 4644, "total_flos": 2.3174284043616256e+16, "train_loss": 0.4229431632299756, "train_runtime": 125794.9271, "train_samples_per_second": 4.728, "train_steps_per_second": 0.037 } ], "logging_steps": 1.0, "max_steps": 4644, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 10000, "total_flos": 2.3174284043616256e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }