FiLM-SEC / trainer_state.json
HYdsl's picture
Upload 8 files
b3a182d
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"global_step": 881659,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 4.997175778844202e-05,
"loss": 1.3304,
"step": 500
},
{
"epoch": 0.0,
"learning_rate": 4.994345886561585e-05,
"loss": 1.3122,
"step": 1000
},
{
"epoch": 0.0,
"learning_rate": 4.991510323152149e-05,
"loss": 1.2993,
"step": 1500
},
{
"epoch": 0.0,
"learning_rate": 4.988680430869531e-05,
"loss": 1.2774,
"step": 2000
},
{
"epoch": 0.0,
"learning_rate": 4.9858448674600954e-05,
"loss": 1.2626,
"step": 2500
},
{
"epoch": 0.0,
"learning_rate": 4.983009304050659e-05,
"loss": 1.2627,
"step": 3000
},
{
"epoch": 0.0,
"learning_rate": 4.980173740641223e-05,
"loss": 1.254,
"step": 3500
},
{
"epoch": 0.0,
"learning_rate": 4.9773381772317873e-05,
"loss": 1.2599,
"step": 4000
},
{
"epoch": 0.01,
"learning_rate": 4.974502613822351e-05,
"loss": 1.2455,
"step": 4500
},
{
"epoch": 0.01,
"learning_rate": 4.971667050412915e-05,
"loss": 1.2563,
"step": 5000
},
{
"epoch": 0.01,
"learning_rate": 4.968831487003479e-05,
"loss": 1.2451,
"step": 5500
},
{
"epoch": 0.01,
"learning_rate": 4.966001594720862e-05,
"loss": 1.2504,
"step": 6000
},
{
"epoch": 0.01,
"learning_rate": 4.963166031311426e-05,
"loss": 1.2343,
"step": 6500
},
{
"epoch": 0.01,
"learning_rate": 4.9603304679019894e-05,
"loss": 1.2249,
"step": 7000
},
{
"epoch": 0.01,
"learning_rate": 4.9574949044925536e-05,
"loss": 1.2318,
"step": 7500
},
{
"epoch": 0.01,
"learning_rate": 4.954659341083117e-05,
"loss": 1.2294,
"step": 8000
},
{
"epoch": 0.01,
"learning_rate": 4.951823777673681e-05,
"loss": 1.2279,
"step": 8500
},
{
"epoch": 0.01,
"learning_rate": 4.9489882142642455e-05,
"loss": 1.2212,
"step": 9000
},
{
"epoch": 0.01,
"learning_rate": 4.946158321981628e-05,
"loss": 1.2309,
"step": 9500
},
{
"epoch": 0.01,
"learning_rate": 4.943322758572192e-05,
"loss": 1.2208,
"step": 10000
},
{
"epoch": 0.01,
"learning_rate": 4.9404871951627556e-05,
"loss": 1.228,
"step": 10500
},
{
"epoch": 0.01,
"learning_rate": 4.937657302880139e-05,
"loss": 1.2176,
"step": 11000
},
{
"epoch": 0.01,
"learning_rate": 4.934821739470702e-05,
"loss": 1.2161,
"step": 11500
},
{
"epoch": 0.01,
"learning_rate": 4.931986176061267e-05,
"loss": 1.2172,
"step": 12000
},
{
"epoch": 0.01,
"learning_rate": 4.9291506126518306e-05,
"loss": 1.2119,
"step": 12500
},
{
"epoch": 0.01,
"learning_rate": 4.926315049242394e-05,
"loss": 1.2083,
"step": 13000
},
{
"epoch": 0.02,
"learning_rate": 4.9234794858329584e-05,
"loss": 1.2215,
"step": 13500
},
{
"epoch": 0.02,
"learning_rate": 4.9206439224235226e-05,
"loss": 1.2043,
"step": 14000
},
{
"epoch": 0.02,
"learning_rate": 4.917808359014086e-05,
"loss": 1.2027,
"step": 14500
},
{
"epoch": 0.02,
"learning_rate": 4.914978466731469e-05,
"loss": 1.21,
"step": 15000
},
{
"epoch": 0.02,
"learning_rate": 4.912142903322033e-05,
"loss": 1.2094,
"step": 15500
},
{
"epoch": 0.02,
"learning_rate": 4.909307339912597e-05,
"loss": 1.2119,
"step": 16000
},
{
"epoch": 0.02,
"learning_rate": 4.906471776503161e-05,
"loss": 1.193,
"step": 16500
},
{
"epoch": 0.02,
"learning_rate": 4.9036362130937246e-05,
"loss": 1.2033,
"step": 17000
},
{
"epoch": 0.02,
"learning_rate": 4.900806320811108e-05,
"loss": 1.206,
"step": 17500
},
{
"epoch": 0.02,
"learning_rate": 4.897970757401671e-05,
"loss": 1.2061,
"step": 18000
},
{
"epoch": 0.02,
"learning_rate": 4.8951351939922354e-05,
"loss": 1.193,
"step": 18500
},
{
"epoch": 0.02,
"learning_rate": 4.8922996305827996e-05,
"loss": 1.1924,
"step": 19000
},
{
"epoch": 0.02,
"learning_rate": 4.889464067173363e-05,
"loss": 1.1871,
"step": 19500
},
{
"epoch": 0.02,
"learning_rate": 4.8866285037639273e-05,
"loss": 1.1883,
"step": 20000
},
{
"epoch": 0.02,
"learning_rate": 4.883792940354491e-05,
"loss": 1.1917,
"step": 20500
},
{
"epoch": 0.02,
"learning_rate": 4.880957376945055e-05,
"loss": 1.1936,
"step": 21000
},
{
"epoch": 0.02,
"learning_rate": 4.8781218135356186e-05,
"loss": 1.1904,
"step": 21500
},
{
"epoch": 0.02,
"learning_rate": 4.875291921253002e-05,
"loss": 1.1855,
"step": 22000
},
{
"epoch": 0.03,
"learning_rate": 4.872456357843566e-05,
"loss": 1.1826,
"step": 22500
},
{
"epoch": 0.03,
"learning_rate": 4.8696207944341294e-05,
"loss": 1.1903,
"step": 23000
},
{
"epoch": 0.03,
"learning_rate": 4.8667852310246936e-05,
"loss": 1.1751,
"step": 23500
},
{
"epoch": 0.03,
"learning_rate": 4.863955338742076e-05,
"loss": 1.1854,
"step": 24000
},
{
"epoch": 0.03,
"learning_rate": 4.861125446459459e-05,
"loss": 1.1777,
"step": 24500
},
{
"epoch": 0.03,
"learning_rate": 4.8582898830500226e-05,
"loss": 1.1808,
"step": 25000
},
{
"epoch": 0.03,
"learning_rate": 4.8554543196405875e-05,
"loss": 1.1903,
"step": 25500
},
{
"epoch": 0.03,
"learning_rate": 4.852618756231151e-05,
"loss": 1.1777,
"step": 26000
},
{
"epoch": 0.03,
"learning_rate": 4.8497831928217145e-05,
"loss": 1.1816,
"step": 26500
},
{
"epoch": 0.03,
"learning_rate": 4.846947629412279e-05,
"loss": 1.1829,
"step": 27000
},
{
"epoch": 0.03,
"learning_rate": 4.844117737129661e-05,
"loss": 1.1851,
"step": 27500
},
{
"epoch": 0.03,
"learning_rate": 4.841282173720225e-05,
"loss": 1.1809,
"step": 28000
},
{
"epoch": 0.03,
"learning_rate": 4.8384466103107895e-05,
"loss": 1.1794,
"step": 28500
},
{
"epoch": 0.03,
"learning_rate": 4.835616718028172e-05,
"loss": 1.1758,
"step": 29000
},
{
"epoch": 0.03,
"learning_rate": 4.832781154618736e-05,
"loss": 1.1778,
"step": 29500
},
{
"epoch": 0.03,
"learning_rate": 4.8299455912093e-05,
"loss": 1.165,
"step": 30000
},
{
"epoch": 0.03,
"learning_rate": 4.827110027799864e-05,
"loss": 1.1737,
"step": 30500
},
{
"epoch": 0.04,
"learning_rate": 4.824274464390428e-05,
"loss": 1.1753,
"step": 31000
},
{
"epoch": 0.04,
"learning_rate": 4.8214389009809916e-05,
"loss": 1.1745,
"step": 31500
},
{
"epoch": 0.04,
"learning_rate": 4.818603337571556e-05,
"loss": 1.1764,
"step": 32000
},
{
"epoch": 0.04,
"learning_rate": 4.815767774162119e-05,
"loss": 1.1638,
"step": 32500
},
{
"epoch": 0.04,
"learning_rate": 4.8129322107526835e-05,
"loss": 1.1753,
"step": 33000
},
{
"epoch": 0.04,
"learning_rate": 4.810102318470066e-05,
"loss": 1.1726,
"step": 33500
},
{
"epoch": 0.04,
"learning_rate": 4.80726675506063e-05,
"loss": 1.1659,
"step": 34000
},
{
"epoch": 0.04,
"learning_rate": 4.804442533904832e-05,
"loss": 1.1625,
"step": 34500
},
{
"epoch": 0.04,
"learning_rate": 4.8016069704953956e-05,
"loss": 1.1626,
"step": 35000
},
{
"epoch": 0.04,
"learning_rate": 4.79877140708596e-05,
"loss": 1.1735,
"step": 35500
},
{
"epoch": 0.04,
"learning_rate": 4.795935843676524e-05,
"loss": 1.1706,
"step": 36000
},
{
"epoch": 0.04,
"learning_rate": 4.7931002802670875e-05,
"loss": 1.1638,
"step": 36500
},
{
"epoch": 0.04,
"learning_rate": 4.790264716857652e-05,
"loss": 1.1604,
"step": 37000
},
{
"epoch": 0.04,
"learning_rate": 4.787429153448216e-05,
"loss": 1.1753,
"step": 37500
},
{
"epoch": 0.04,
"learning_rate": 4.7845935900387794e-05,
"loss": 1.1571,
"step": 38000
},
{
"epoch": 0.04,
"learning_rate": 4.781758026629343e-05,
"loss": 1.1588,
"step": 38500
},
{
"epoch": 0.04,
"learning_rate": 4.778922463219907e-05,
"loss": 1.1719,
"step": 39000
},
{
"epoch": 0.04,
"learning_rate": 4.7760868998104714e-05,
"loss": 1.1604,
"step": 39500
},
{
"epoch": 0.05,
"learning_rate": 4.773251336401035e-05,
"loss": 1.1631,
"step": 40000
},
{
"epoch": 0.05,
"learning_rate": 4.770415772991599e-05,
"loss": 1.148,
"step": 40500
},
{
"epoch": 0.05,
"learning_rate": 4.767580209582163e-05,
"loss": 1.1462,
"step": 41000
},
{
"epoch": 0.05,
"learning_rate": 4.764750317299546e-05,
"loss": 1.1637,
"step": 41500
},
{
"epoch": 0.05,
"learning_rate": 4.76191475389011e-05,
"loss": 1.1625,
"step": 42000
},
{
"epoch": 0.05,
"learning_rate": 4.7590791904806734e-05,
"loss": 1.1514,
"step": 42500
},
{
"epoch": 0.05,
"learning_rate": 4.7562436270712376e-05,
"loss": 1.1566,
"step": 43000
},
{
"epoch": 0.05,
"learning_rate": 4.753408063661802e-05,
"loss": 1.1626,
"step": 43500
},
{
"epoch": 0.05,
"learning_rate": 4.750578171379184e-05,
"loss": 1.1559,
"step": 44000
},
{
"epoch": 0.05,
"learning_rate": 4.747742607969748e-05,
"loss": 1.1504,
"step": 44500
},
{
"epoch": 0.05,
"learning_rate": 4.744907044560312e-05,
"loss": 1.1485,
"step": 45000
},
{
"epoch": 0.05,
"learning_rate": 4.742071481150876e-05,
"loss": 1.1574,
"step": 45500
},
{
"epoch": 0.05,
"learning_rate": 4.7392359177414397e-05,
"loss": 1.1498,
"step": 46000
},
{
"epoch": 0.05,
"learning_rate": 4.736400354332004e-05,
"loss": 1.1593,
"step": 46500
},
{
"epoch": 0.05,
"learning_rate": 4.733564790922568e-05,
"loss": 1.149,
"step": 47000
},
{
"epoch": 0.05,
"learning_rate": 4.7307348986399505e-05,
"loss": 1.1522,
"step": 47500
},
{
"epoch": 0.05,
"learning_rate": 4.7278993352305147e-05,
"loss": 1.1449,
"step": 48000
},
{
"epoch": 0.06,
"learning_rate": 4.725063771821078e-05,
"loss": 1.1574,
"step": 48500
},
{
"epoch": 0.06,
"learning_rate": 4.7222282084116424e-05,
"loss": 1.1529,
"step": 49000
},
{
"epoch": 0.06,
"learning_rate": 4.7193926450022066e-05,
"loss": 1.1524,
"step": 49500
},
{
"epoch": 0.06,
"learning_rate": 4.71655708159277e-05,
"loss": 1.1406,
"step": 50000
},
{
"epoch": 0.06,
"learning_rate": 4.713721518183334e-05,
"loss": 1.1452,
"step": 50500
},
{
"epoch": 0.06,
"learning_rate": 4.710891625900717e-05,
"loss": 1.1443,
"step": 51000
},
{
"epoch": 0.06,
"learning_rate": 4.708056062491281e-05,
"loss": 1.1378,
"step": 51500
},
{
"epoch": 0.06,
"learning_rate": 4.705220499081845e-05,
"loss": 1.1504,
"step": 52000
},
{
"epoch": 0.06,
"learning_rate": 4.7023849356724086e-05,
"loss": 1.1519,
"step": 52500
},
{
"epoch": 0.06,
"learning_rate": 4.699549372262972e-05,
"loss": 1.14,
"step": 53000
},
{
"epoch": 0.06,
"learning_rate": 4.696719479980355e-05,
"loss": 1.1465,
"step": 53500
},
{
"epoch": 0.06,
"learning_rate": 4.6938839165709194e-05,
"loss": 1.1483,
"step": 54000
},
{
"epoch": 0.06,
"learning_rate": 4.6910483531614836e-05,
"loss": 1.1561,
"step": 54500
},
{
"epoch": 0.06,
"learning_rate": 4.688212789752047e-05,
"loss": 1.1454,
"step": 55000
},
{
"epoch": 0.06,
"learning_rate": 4.685377226342611e-05,
"loss": 1.1399,
"step": 55500
},
{
"epoch": 0.06,
"learning_rate": 4.6825416629331756e-05,
"loss": 1.1418,
"step": 56000
},
{
"epoch": 0.06,
"learning_rate": 4.679711770650558e-05,
"loss": 1.1524,
"step": 56500
},
{
"epoch": 0.06,
"learning_rate": 4.6768762072411215e-05,
"loss": 1.1433,
"step": 57000
},
{
"epoch": 0.07,
"learning_rate": 4.674040643831686e-05,
"loss": 1.1457,
"step": 57500
},
{
"epoch": 0.07,
"learning_rate": 4.67120508042225e-05,
"loss": 1.1497,
"step": 58000
},
{
"epoch": 0.07,
"learning_rate": 4.668375188139632e-05,
"loss": 1.1427,
"step": 58500
},
{
"epoch": 0.07,
"learning_rate": 4.6655396247301965e-05,
"loss": 1.14,
"step": 59000
},
{
"epoch": 0.07,
"learning_rate": 4.66270406132076e-05,
"loss": 1.1461,
"step": 59500
},
{
"epoch": 0.07,
"learning_rate": 4.659868497911324e-05,
"loss": 1.1371,
"step": 60000
},
{
"epoch": 0.07,
"learning_rate": 4.6570386056287066e-05,
"loss": 1.137,
"step": 60500
},
{
"epoch": 0.07,
"learning_rate": 4.6542030422192715e-05,
"loss": 1.141,
"step": 61000
},
{
"epoch": 0.07,
"learning_rate": 4.651367478809835e-05,
"loss": 1.1325,
"step": 61500
},
{
"epoch": 0.07,
"learning_rate": 4.648537586527218e-05,
"loss": 1.1385,
"step": 62000
},
{
"epoch": 0.07,
"learning_rate": 4.6457020231177816e-05,
"loss": 1.1444,
"step": 62500
},
{
"epoch": 0.07,
"learning_rate": 4.642866459708345e-05,
"loss": 1.141,
"step": 63000
},
{
"epoch": 0.07,
"learning_rate": 4.6400308962989093e-05,
"loss": 1.1354,
"step": 63500
},
{
"epoch": 0.07,
"learning_rate": 4.6371953328894735e-05,
"loss": 1.1478,
"step": 64000
},
{
"epoch": 0.07,
"learning_rate": 4.634359769480037e-05,
"loss": 1.1372,
"step": 64500
},
{
"epoch": 0.07,
"learning_rate": 4.631524206070601e-05,
"loss": 1.1286,
"step": 65000
},
{
"epoch": 0.07,
"learning_rate": 4.6286943137879843e-05,
"loss": 1.1348,
"step": 65500
},
{
"epoch": 0.07,
"learning_rate": 4.625858750378548e-05,
"loss": 1.1296,
"step": 66000
},
{
"epoch": 0.08,
"learning_rate": 4.623028858095931e-05,
"loss": 1.1325,
"step": 66500
},
{
"epoch": 0.08,
"learning_rate": 4.6201932946864945e-05,
"loss": 1.1338,
"step": 67000
},
{
"epoch": 0.08,
"learning_rate": 4.617357731277059e-05,
"loss": 1.132,
"step": 67500
},
{
"epoch": 0.08,
"learning_rate": 4.614522167867623e-05,
"loss": 1.1414,
"step": 68000
},
{
"epoch": 0.08,
"learning_rate": 4.6116866044581864e-05,
"loss": 1.1266,
"step": 68500
},
{
"epoch": 0.08,
"learning_rate": 4.60885104104875e-05,
"loss": 1.1294,
"step": 69000
},
{
"epoch": 0.08,
"learning_rate": 4.606015477639314e-05,
"loss": 1.1278,
"step": 69500
},
{
"epoch": 0.08,
"learning_rate": 4.6031855853566965e-05,
"loss": 1.1262,
"step": 70000
},
{
"epoch": 0.08,
"learning_rate": 4.6003500219472614e-05,
"loss": 1.1395,
"step": 70500
},
{
"epoch": 0.08,
"learning_rate": 4.597514458537825e-05,
"loss": 1.1315,
"step": 71000
},
{
"epoch": 0.08,
"learning_rate": 4.5946788951283884e-05,
"loss": 1.1319,
"step": 71500
},
{
"epoch": 0.08,
"learning_rate": 4.5918433317189526e-05,
"loss": 1.1272,
"step": 72000
},
{
"epoch": 0.08,
"learning_rate": 4.589007768309517e-05,
"loss": 1.1259,
"step": 72500
},
{
"epoch": 0.08,
"learning_rate": 4.5861722049000804e-05,
"loss": 1.1353,
"step": 73000
},
{
"epoch": 0.08,
"learning_rate": 4.5833366414906446e-05,
"loss": 1.1257,
"step": 73500
},
{
"epoch": 0.08,
"learning_rate": 4.580501078081209e-05,
"loss": 1.128,
"step": 74000
},
{
"epoch": 0.08,
"learning_rate": 4.577665514671772e-05,
"loss": 1.1318,
"step": 74500
},
{
"epoch": 0.09,
"learning_rate": 4.5748356223891554e-05,
"loss": 1.1273,
"step": 75000
},
{
"epoch": 0.09,
"learning_rate": 4.572000058979719e-05,
"loss": 1.1279,
"step": 75500
},
{
"epoch": 0.09,
"learning_rate": 4.569170166697102e-05,
"loss": 1.1378,
"step": 76000
},
{
"epoch": 0.09,
"learning_rate": 4.5663346032876655e-05,
"loss": 1.1314,
"step": 76500
},
{
"epoch": 0.09,
"learning_rate": 4.56349903987823e-05,
"loss": 1.1322,
"step": 77000
},
{
"epoch": 0.09,
"learning_rate": 4.560663476468794e-05,
"loss": 1.1253,
"step": 77500
},
{
"epoch": 0.09,
"learning_rate": 4.5578279130593574e-05,
"loss": 1.1267,
"step": 78000
},
{
"epoch": 0.09,
"learning_rate": 4.5549923496499216e-05,
"loss": 1.1214,
"step": 78500
},
{
"epoch": 0.09,
"learning_rate": 4.552156786240486e-05,
"loss": 1.1325,
"step": 79000
},
{
"epoch": 0.09,
"learning_rate": 4.5493212228310493e-05,
"loss": 1.1245,
"step": 79500
},
{
"epoch": 0.09,
"learning_rate": 4.546485659421613e-05,
"loss": 1.1287,
"step": 80000
},
{
"epoch": 0.09,
"learning_rate": 4.543650096012178e-05,
"loss": 1.1274,
"step": 80500
},
{
"epoch": 0.09,
"learning_rate": 4.540814532602741e-05,
"loss": 1.1321,
"step": 81000
},
{
"epoch": 0.09,
"learning_rate": 4.537978969193305e-05,
"loss": 1.1246,
"step": 81500
},
{
"epoch": 0.09,
"learning_rate": 4.535143405783869e-05,
"loss": 1.1176,
"step": 82000
},
{
"epoch": 0.09,
"learning_rate": 4.532313513501252e-05,
"loss": 1.1291,
"step": 82500
},
{
"epoch": 0.09,
"learning_rate": 4.5294779500918156e-05,
"loss": 1.1186,
"step": 83000
},
{
"epoch": 0.09,
"learning_rate": 4.52664238668238e-05,
"loss": 1.1247,
"step": 83500
},
{
"epoch": 0.1,
"learning_rate": 4.523812494399762e-05,
"loss": 1.1236,
"step": 84000
},
{
"epoch": 0.1,
"learning_rate": 4.5209769309903264e-05,
"loss": 1.1217,
"step": 84500
},
{
"epoch": 0.1,
"learning_rate": 4.5181413675808906e-05,
"loss": 1.1094,
"step": 85000
},
{
"epoch": 0.1,
"learning_rate": 4.515305804171454e-05,
"loss": 1.1224,
"step": 85500
},
{
"epoch": 0.1,
"learning_rate": 4.512470240762018e-05,
"loss": 1.1217,
"step": 86000
},
{
"epoch": 0.1,
"learning_rate": 4.509634677352582e-05,
"loss": 1.1242,
"step": 86500
},
{
"epoch": 0.1,
"learning_rate": 4.506799113943146e-05,
"loss": 1.119,
"step": 87000
},
{
"epoch": 0.1,
"learning_rate": 4.50396355053371e-05,
"loss": 1.1181,
"step": 87500
},
{
"epoch": 0.1,
"learning_rate": 4.501127987124274e-05,
"loss": 1.1154,
"step": 88000
},
{
"epoch": 0.1,
"learning_rate": 4.498292423714838e-05,
"loss": 1.1226,
"step": 88500
},
{
"epoch": 0.1,
"learning_rate": 4.4954568603054015e-05,
"loss": 1.1146,
"step": 89000
},
{
"epoch": 0.1,
"learning_rate": 4.492621296895966e-05,
"loss": 1.1202,
"step": 89500
},
{
"epoch": 0.1,
"learning_rate": 4.489785733486529e-05,
"loss": 1.0994,
"step": 90000
},
{
"epoch": 0.1,
"learning_rate": 4.486955841203912e-05,
"loss": 1.1205,
"step": 90500
},
{
"epoch": 0.1,
"learning_rate": 4.4841202777944765e-05,
"loss": 1.121,
"step": 91000
},
{
"epoch": 0.1,
"learning_rate": 4.48128471438504e-05,
"loss": 1.1109,
"step": 91500
},
{
"epoch": 0.1,
"learning_rate": 4.478449150975604e-05,
"loss": 1.1127,
"step": 92000
},
{
"epoch": 0.1,
"learning_rate": 4.4756135875661684e-05,
"loss": 1.1177,
"step": 92500
},
{
"epoch": 0.11,
"learning_rate": 4.472778024156732e-05,
"loss": 1.1124,
"step": 93000
},
{
"epoch": 0.11,
"learning_rate": 4.469948131874115e-05,
"loss": 1.1209,
"step": 93500
},
{
"epoch": 0.11,
"learning_rate": 4.4671125684646785e-05,
"loss": 1.1206,
"step": 94000
},
{
"epoch": 0.11,
"learning_rate": 4.464277005055243e-05,
"loss": 1.1107,
"step": 94500
},
{
"epoch": 0.11,
"learning_rate": 4.461441441645807e-05,
"loss": 1.1137,
"step": 95000
},
{
"epoch": 0.11,
"learning_rate": 4.4586058782363705e-05,
"loss": 1.1213,
"step": 95500
},
{
"epoch": 0.11,
"learning_rate": 4.4557759859537535e-05,
"loss": 1.1106,
"step": 96000
},
{
"epoch": 0.11,
"learning_rate": 4.452940422544317e-05,
"loss": 1.1123,
"step": 96500
},
{
"epoch": 0.11,
"learning_rate": 4.450104859134881e-05,
"loss": 1.1174,
"step": 97000
},
{
"epoch": 0.11,
"learning_rate": 4.447274966852264e-05,
"loss": 1.1023,
"step": 97500
},
{
"epoch": 0.11,
"learning_rate": 4.444445074569647e-05,
"loss": 1.1074,
"step": 98000
},
{
"epoch": 0.11,
"learning_rate": 4.441609511160211e-05,
"loss": 1.1233,
"step": 98500
},
{
"epoch": 0.11,
"learning_rate": 4.4387739477507745e-05,
"loss": 1.119,
"step": 99000
},
{
"epoch": 0.11,
"learning_rate": 4.435938384341339e-05,
"loss": 1.1247,
"step": 99500
},
{
"epoch": 0.11,
"learning_rate": 4.433108492058721e-05,
"loss": 1.1054,
"step": 100000
},
{
"epoch": 0.11,
"learning_rate": 4.430272928649285e-05,
"loss": 1.1037,
"step": 100500
},
{
"epoch": 0.11,
"learning_rate": 4.4274373652398495e-05,
"loss": 1.1209,
"step": 101000
},
{
"epoch": 0.12,
"learning_rate": 4.424601801830413e-05,
"loss": 1.1118,
"step": 101500
},
{
"epoch": 0.12,
"learning_rate": 4.4217662384209765e-05,
"loss": 1.1099,
"step": 102000
},
{
"epoch": 0.12,
"learning_rate": 4.4189306750115414e-05,
"loss": 1.1069,
"step": 102500
},
{
"epoch": 0.12,
"learning_rate": 4.416095111602105e-05,
"loss": 1.1166,
"step": 103000
},
{
"epoch": 0.12,
"learning_rate": 4.4132595481926684e-05,
"loss": 1.1263,
"step": 103500
},
{
"epoch": 0.12,
"learning_rate": 4.4104239847832326e-05,
"loss": 1.1107,
"step": 104000
},
{
"epoch": 0.12,
"learning_rate": 4.407588421373797e-05,
"loss": 1.1145,
"step": 104500
},
{
"epoch": 0.12,
"learning_rate": 4.40475852909118e-05,
"loss": 1.1109,
"step": 105000
},
{
"epoch": 0.12,
"learning_rate": 4.4019229656817435e-05,
"loss": 1.1037,
"step": 105500
},
{
"epoch": 0.12,
"learning_rate": 4.399087402272307e-05,
"loss": 1.1023,
"step": 106000
},
{
"epoch": 0.12,
"learning_rate": 4.396251838862871e-05,
"loss": 1.1152,
"step": 106500
},
{
"epoch": 0.12,
"learning_rate": 4.3934162754534354e-05,
"loss": 1.1259,
"step": 107000
},
{
"epoch": 0.12,
"learning_rate": 4.390580712043999e-05,
"loss": 1.1106,
"step": 107500
},
{
"epoch": 0.12,
"learning_rate": 4.387745148634563e-05,
"loss": 1.1024,
"step": 108000
},
{
"epoch": 0.12,
"learning_rate": 4.3849152563519455e-05,
"loss": 1.1107,
"step": 108500
},
{
"epoch": 0.12,
"learning_rate": 4.38207969294251e-05,
"loss": 1.1101,
"step": 109000
},
{
"epoch": 0.12,
"learning_rate": 4.379244129533074e-05,
"loss": 1.1149,
"step": 109500
},
{
"epoch": 0.12,
"learning_rate": 4.3764085661236374e-05,
"loss": 1.1109,
"step": 110000
},
{
"epoch": 0.13,
"learning_rate": 4.3735786738410205e-05,
"loss": 1.1148,
"step": 110500
},
{
"epoch": 0.13,
"learning_rate": 4.370748781558403e-05,
"loss": 1.1078,
"step": 111000
},
{
"epoch": 0.13,
"learning_rate": 4.367913218148967e-05,
"loss": 1.1108,
"step": 111500
},
{
"epoch": 0.13,
"learning_rate": 4.365077654739531e-05,
"loss": 1.1025,
"step": 112000
},
{
"epoch": 0.13,
"learning_rate": 4.362247762456914e-05,
"loss": 1.1018,
"step": 112500
},
{
"epoch": 0.13,
"learning_rate": 4.359412199047478e-05,
"loss": 1.1211,
"step": 113000
},
{
"epoch": 0.13,
"learning_rate": 4.3565766356380414e-05,
"loss": 1.1098,
"step": 113500
},
{
"epoch": 0.13,
"learning_rate": 4.3537410722286056e-05,
"loss": 1.1044,
"step": 114000
},
{
"epoch": 0.13,
"learning_rate": 4.35090550881917e-05,
"loss": 1.103,
"step": 114500
},
{
"epoch": 0.13,
"learning_rate": 4.3480699454097334e-05,
"loss": 1.1096,
"step": 115000
},
{
"epoch": 0.13,
"learning_rate": 4.345234382000297e-05,
"loss": 1.103,
"step": 115500
},
{
"epoch": 0.13,
"learning_rate": 4.342398818590862e-05,
"loss": 1.1027,
"step": 116000
},
{
"epoch": 0.13,
"learning_rate": 4.339563255181425e-05,
"loss": 1.1022,
"step": 116500
},
{
"epoch": 0.13,
"learning_rate": 4.336727691771989e-05,
"loss": 1.1095,
"step": 117000
},
{
"epoch": 0.13,
"learning_rate": 4.333892128362553e-05,
"loss": 1.0986,
"step": 117500
},
{
"epoch": 0.13,
"learning_rate": 4.331056564953117e-05,
"loss": 1.0942,
"step": 118000
},
{
"epoch": 0.13,
"learning_rate": 4.328221001543681e-05,
"loss": 1.1055,
"step": 118500
},
{
"epoch": 0.13,
"learning_rate": 4.325385438134245e-05,
"loss": 1.1031,
"step": 119000
},
{
"epoch": 0.14,
"learning_rate": 4.322555545851627e-05,
"loss": 1.1006,
"step": 119500
},
{
"epoch": 0.14,
"learning_rate": 4.3197199824421915e-05,
"loss": 1.1081,
"step": 120000
},
{
"epoch": 0.14,
"learning_rate": 4.316884419032756e-05,
"loss": 1.0948,
"step": 120500
},
{
"epoch": 0.14,
"learning_rate": 4.314048855623319e-05,
"loss": 1.0963,
"step": 121000
},
{
"epoch": 0.14,
"learning_rate": 4.311218963340702e-05,
"loss": 1.1024,
"step": 121500
},
{
"epoch": 0.14,
"learning_rate": 4.308383399931266e-05,
"loss": 1.1126,
"step": 122000
},
{
"epoch": 0.14,
"learning_rate": 4.30554783652183e-05,
"loss": 1.0998,
"step": 122500
},
{
"epoch": 0.14,
"learning_rate": 4.302712273112394e-05,
"loss": 1.0975,
"step": 123000
},
{
"epoch": 0.14,
"learning_rate": 4.299876709702958e-05,
"loss": 1.098,
"step": 123500
},
{
"epoch": 0.14,
"learning_rate": 4.297041146293522e-05,
"loss": 1.0997,
"step": 124000
},
{
"epoch": 0.14,
"learning_rate": 4.294205582884086e-05,
"loss": 1.1036,
"step": 124500
},
{
"epoch": 0.14,
"learning_rate": 4.2913756906014686e-05,
"loss": 1.1071,
"step": 125000
},
{
"epoch": 0.14,
"learning_rate": 4.288545798318852e-05,
"loss": 1.0956,
"step": 125500
},
{
"epoch": 0.14,
"learning_rate": 4.285710234909415e-05,
"loss": 1.0978,
"step": 126000
},
{
"epoch": 0.14,
"learning_rate": 4.282880342626798e-05,
"loss": 1.1087,
"step": 126500
},
{
"epoch": 0.14,
"learning_rate": 4.280044779217362e-05,
"loss": 1.094,
"step": 127000
},
{
"epoch": 0.14,
"learning_rate": 4.277209215807926e-05,
"loss": 1.0898,
"step": 127500
},
{
"epoch": 0.15,
"learning_rate": 4.27437365239849e-05,
"loss": 1.0999,
"step": 128000
},
{
"epoch": 0.15,
"learning_rate": 4.271538088989054e-05,
"loss": 1.1047,
"step": 128500
},
{
"epoch": 0.15,
"learning_rate": 4.268702525579618e-05,
"loss": 1.0952,
"step": 129000
},
{
"epoch": 0.15,
"learning_rate": 4.265866962170182e-05,
"loss": 1.0901,
"step": 129500
},
{
"epoch": 0.15,
"learning_rate": 4.2630313987607456e-05,
"loss": 1.1066,
"step": 130000
},
{
"epoch": 0.15,
"learning_rate": 4.260201506478128e-05,
"loss": 1.1087,
"step": 130500
},
{
"epoch": 0.15,
"learning_rate": 4.257371614195511e-05,
"loss": 1.097,
"step": 131000
},
{
"epoch": 0.15,
"learning_rate": 4.2545360507860747e-05,
"loss": 1.1003,
"step": 131500
},
{
"epoch": 0.15,
"learning_rate": 4.251700487376639e-05,
"loss": 1.0966,
"step": 132000
},
{
"epoch": 0.15,
"learning_rate": 4.248864923967203e-05,
"loss": 1.0973,
"step": 132500
},
{
"epoch": 0.15,
"learning_rate": 4.2460293605577666e-05,
"loss": 1.0905,
"step": 133000
},
{
"epoch": 0.15,
"learning_rate": 4.243193797148331e-05,
"loss": 1.1033,
"step": 133500
},
{
"epoch": 0.15,
"learning_rate": 4.240358233738895e-05,
"loss": 1.0993,
"step": 134000
},
{
"epoch": 0.15,
"learning_rate": 4.2375226703294585e-05,
"loss": 1.1042,
"step": 134500
},
{
"epoch": 0.15,
"learning_rate": 4.234687106920023e-05,
"loss": 1.1049,
"step": 135000
},
{
"epoch": 0.15,
"learning_rate": 4.231851543510586e-05,
"loss": 1.1044,
"step": 135500
},
{
"epoch": 0.15,
"learning_rate": 4.229021651227969e-05,
"loss": 1.1066,
"step": 136000
},
{
"epoch": 0.15,
"learning_rate": 4.2261860878185335e-05,
"loss": 1.0993,
"step": 136500
},
{
"epoch": 0.16,
"learning_rate": 4.223350524409097e-05,
"loss": 1.0956,
"step": 137000
},
{
"epoch": 0.16,
"learning_rate": 4.2205149609996605e-05,
"loss": 1.0968,
"step": 137500
},
{
"epoch": 0.16,
"learning_rate": 4.2176793975902254e-05,
"loss": 1.0987,
"step": 138000
},
{
"epoch": 0.16,
"learning_rate": 4.214849505307608e-05,
"loss": 1.0942,
"step": 138500
},
{
"epoch": 0.16,
"learning_rate": 4.212013941898172e-05,
"loss": 1.0911,
"step": 139000
},
{
"epoch": 0.16,
"learning_rate": 4.2091783784887355e-05,
"loss": 1.0987,
"step": 139500
},
{
"epoch": 0.16,
"learning_rate": 4.206342815079299e-05,
"loss": 1.0996,
"step": 140000
},
{
"epoch": 0.16,
"learning_rate": 4.203507251669864e-05,
"loss": 1.0917,
"step": 140500
},
{
"epoch": 0.16,
"learning_rate": 4.2006716882604275e-05,
"loss": 1.0973,
"step": 141000
},
{
"epoch": 0.16,
"learning_rate": 4.197836124850991e-05,
"loss": 1.1076,
"step": 141500
},
{
"epoch": 0.16,
"learning_rate": 4.195000561441555e-05,
"loss": 1.0885,
"step": 142000
},
{
"epoch": 0.16,
"learning_rate": 4.1921649980321194e-05,
"loss": 1.0938,
"step": 142500
},
{
"epoch": 0.16,
"learning_rate": 4.189329434622683e-05,
"loss": 1.0964,
"step": 143000
},
{
"epoch": 0.16,
"learning_rate": 4.186493871213247e-05,
"loss": 1.0999,
"step": 143500
},
{
"epoch": 0.16,
"learning_rate": 4.183658307803811e-05,
"loss": 1.0868,
"step": 144000
},
{
"epoch": 0.16,
"learning_rate": 4.180822744394375e-05,
"loss": 1.0868,
"step": 144500
},
{
"epoch": 0.16,
"learning_rate": 4.1779871809849384e-05,
"loss": 1.0857,
"step": 145000
},
{
"epoch": 0.17,
"learning_rate": 4.1751572887023214e-05,
"loss": 1.0877,
"step": 145500
},
{
"epoch": 0.17,
"learning_rate": 4.1723217252928856e-05,
"loss": 1.096,
"step": 146000
},
{
"epoch": 0.17,
"learning_rate": 4.169491833010268e-05,
"loss": 1.0915,
"step": 146500
},
{
"epoch": 0.17,
"learning_rate": 4.166656269600832e-05,
"loss": 1.0894,
"step": 147000
},
{
"epoch": 0.17,
"learning_rate": 4.1638207061913964e-05,
"loss": 1.0959,
"step": 147500
},
{
"epoch": 0.17,
"learning_rate": 4.16098514278196e-05,
"loss": 1.0915,
"step": 148000
},
{
"epoch": 0.17,
"learning_rate": 4.158155250499343e-05,
"loss": 1.0851,
"step": 148500
},
{
"epoch": 0.17,
"learning_rate": 4.155319687089907e-05,
"loss": 1.0984,
"step": 149000
},
{
"epoch": 0.17,
"learning_rate": 4.152484123680471e-05,
"loss": 1.0902,
"step": 149500
},
{
"epoch": 0.17,
"learning_rate": 4.149654231397854e-05,
"loss": 1.0895,
"step": 150000
},
{
"epoch": 0.17,
"learning_rate": 4.1468186679884174e-05,
"loss": 1.0867,
"step": 150500
},
{
"epoch": 0.17,
"learning_rate": 4.143983104578981e-05,
"loss": 1.0924,
"step": 151000
},
{
"epoch": 0.17,
"learning_rate": 4.141147541169546e-05,
"loss": 1.0795,
"step": 151500
},
{
"epoch": 0.17,
"learning_rate": 4.138311977760109e-05,
"loss": 1.0907,
"step": 152000
},
{
"epoch": 0.17,
"learning_rate": 4.135476414350673e-05,
"loss": 1.0914,
"step": 152500
},
{
"epoch": 0.17,
"learning_rate": 4.132646522068056e-05,
"loss": 1.0896,
"step": 153000
},
{
"epoch": 0.17,
"learning_rate": 4.12981095865862e-05,
"loss": 1.0801,
"step": 153500
},
{
"epoch": 0.17,
"learning_rate": 4.126975395249184e-05,
"loss": 1.0953,
"step": 154000
},
{
"epoch": 0.18,
"learning_rate": 4.124139831839748e-05,
"loss": 1.0908,
"step": 154500
},
{
"epoch": 0.18,
"learning_rate": 4.12130993955713e-05,
"loss": 1.0882,
"step": 155000
},
{
"epoch": 0.18,
"learning_rate": 4.1184743761476944e-05,
"loss": 1.0917,
"step": 155500
},
{
"epoch": 0.18,
"learning_rate": 4.1156388127382586e-05,
"loss": 1.0837,
"step": 156000
},
{
"epoch": 0.18,
"learning_rate": 4.112803249328822e-05,
"loss": 1.0893,
"step": 156500
},
{
"epoch": 0.18,
"learning_rate": 4.1099676859193864e-05,
"loss": 1.099,
"step": 157000
},
{
"epoch": 0.18,
"learning_rate": 4.10713212250995e-05,
"loss": 1.0879,
"step": 157500
},
{
"epoch": 0.18,
"learning_rate": 4.104296559100514e-05,
"loss": 1.0878,
"step": 158000
},
{
"epoch": 0.18,
"learning_rate": 4.101460995691078e-05,
"loss": 1.0789,
"step": 158500
},
{
"epoch": 0.18,
"learning_rate": 4.098625432281642e-05,
"loss": 1.0858,
"step": 159000
},
{
"epoch": 0.18,
"learning_rate": 4.095789868872206e-05,
"loss": 1.0892,
"step": 159500
},
{
"epoch": 0.18,
"learning_rate": 4.09295430546277e-05,
"loss": 1.0869,
"step": 160000
},
{
"epoch": 0.18,
"learning_rate": 4.0901244131801526e-05,
"loss": 1.0855,
"step": 160500
},
{
"epoch": 0.18,
"learning_rate": 4.087288849770717e-05,
"loss": 1.0937,
"step": 161000
},
{
"epoch": 0.18,
"learning_rate": 4.084458957488099e-05,
"loss": 1.0903,
"step": 161500
},
{
"epoch": 0.18,
"learning_rate": 4.081623394078663e-05,
"loss": 1.0806,
"step": 162000
},
{
"epoch": 0.18,
"learning_rate": 4.0787878306692276e-05,
"loss": 1.0861,
"step": 162500
},
{
"epoch": 0.18,
"learning_rate": 4.075952267259791e-05,
"loss": 1.0807,
"step": 163000
},
{
"epoch": 0.19,
"learning_rate": 4.0731167038503547e-05,
"loss": 1.0914,
"step": 163500
},
{
"epoch": 0.19,
"learning_rate": 4.070281140440919e-05,
"loss": 1.0872,
"step": 164000
},
{
"epoch": 0.19,
"learning_rate": 4.067451248158301e-05,
"loss": 1.0803,
"step": 164500
},
{
"epoch": 0.19,
"learning_rate": 4.064615684748866e-05,
"loss": 1.0973,
"step": 165000
},
{
"epoch": 0.19,
"learning_rate": 4.0617801213394297e-05,
"loss": 1.0788,
"step": 165500
},
{
"epoch": 0.19,
"learning_rate": 4.058944557929993e-05,
"loss": 1.0794,
"step": 166000
},
{
"epoch": 0.19,
"learning_rate": 4.0561089945205574e-05,
"loss": 1.0794,
"step": 166500
},
{
"epoch": 0.19,
"learning_rate": 4.0532734311111216e-05,
"loss": 1.0831,
"step": 167000
},
{
"epoch": 0.19,
"learning_rate": 4.050437867701685e-05,
"loss": 1.0809,
"step": 167500
},
{
"epoch": 0.19,
"learning_rate": 4.047602304292249e-05,
"loss": 1.0758,
"step": 168000
},
{
"epoch": 0.19,
"learning_rate": 4.0447667408828135e-05,
"loss": 1.094,
"step": 168500
},
{
"epoch": 0.19,
"learning_rate": 4.041931177473377e-05,
"loss": 1.0886,
"step": 169000
},
{
"epoch": 0.19,
"learning_rate": 4.0390956140639405e-05,
"loss": 1.0821,
"step": 169500
},
{
"epoch": 0.19,
"learning_rate": 4.036260050654505e-05,
"loss": 1.0738,
"step": 170000
},
{
"epoch": 0.19,
"learning_rate": 4.033424487245069e-05,
"loss": 1.0809,
"step": 170500
},
{
"epoch": 0.19,
"learning_rate": 4.030594594962452e-05,
"loss": 1.0909,
"step": 171000
},
{
"epoch": 0.19,
"learning_rate": 4.0277590315530155e-05,
"loss": 1.086,
"step": 171500
},
{
"epoch": 0.2,
"learning_rate": 4.024923468143579e-05,
"loss": 1.0798,
"step": 172000
},
{
"epoch": 0.2,
"learning_rate": 4.022087904734144e-05,
"loss": 1.0911,
"step": 172500
},
{
"epoch": 0.2,
"learning_rate": 4.0192580124515264e-05,
"loss": 1.0834,
"step": 173000
},
{
"epoch": 0.2,
"learning_rate": 4.0164224490420906e-05,
"loss": 1.0834,
"step": 173500
},
{
"epoch": 0.2,
"learning_rate": 4.013592556759473e-05,
"loss": 1.0755,
"step": 174000
},
{
"epoch": 0.2,
"learning_rate": 4.0107569933500365e-05,
"loss": 1.0855,
"step": 174500
},
{
"epoch": 0.2,
"learning_rate": 4.0079271010674196e-05,
"loss": 1.077,
"step": 175000
},
{
"epoch": 0.2,
"learning_rate": 4.005091537657983e-05,
"loss": 1.0917,
"step": 175500
},
{
"epoch": 0.2,
"learning_rate": 4.002255974248548e-05,
"loss": 1.0744,
"step": 176000
},
{
"epoch": 0.2,
"learning_rate": 3.9994204108391115e-05,
"loss": 1.0882,
"step": 176500
},
{
"epoch": 0.2,
"learning_rate": 3.996584847429675e-05,
"loss": 1.081,
"step": 177000
},
{
"epoch": 0.2,
"learning_rate": 3.993749284020239e-05,
"loss": 1.0818,
"step": 177500
},
{
"epoch": 0.2,
"learning_rate": 3.9909137206108034e-05,
"loss": 1.0766,
"step": 178000
},
{
"epoch": 0.2,
"learning_rate": 3.988078157201367e-05,
"loss": 1.0843,
"step": 178500
},
{
"epoch": 0.2,
"learning_rate": 3.985242593791931e-05,
"loss": 1.0836,
"step": 179000
},
{
"epoch": 0.2,
"learning_rate": 3.982407030382495e-05,
"loss": 1.0837,
"step": 179500
},
{
"epoch": 0.2,
"learning_rate": 3.979571466973059e-05,
"loss": 1.0808,
"step": 180000
},
{
"epoch": 0.2,
"learning_rate": 3.976735903563623e-05,
"loss": 1.0773,
"step": 180500
},
{
"epoch": 0.21,
"learning_rate": 3.9739003401541866e-05,
"loss": 1.0918,
"step": 181000
},
{
"epoch": 0.21,
"learning_rate": 3.971064776744751e-05,
"loss": 1.085,
"step": 181500
},
{
"epoch": 0.21,
"learning_rate": 3.968229213335314e-05,
"loss": 1.081,
"step": 182000
},
{
"epoch": 0.21,
"learning_rate": 3.9653936499258785e-05,
"loss": 1.0789,
"step": 182500
},
{
"epoch": 0.21,
"learning_rate": 3.962563757643261e-05,
"loss": 1.0768,
"step": 183000
},
{
"epoch": 0.21,
"learning_rate": 3.959728194233826e-05,
"loss": 1.0727,
"step": 183500
},
{
"epoch": 0.21,
"learning_rate": 3.956892630824389e-05,
"loss": 1.0834,
"step": 184000
},
{
"epoch": 0.21,
"learning_rate": 3.954057067414953e-05,
"loss": 1.0872,
"step": 184500
},
{
"epoch": 0.21,
"learning_rate": 3.951227175132336e-05,
"loss": 1.0829,
"step": 185000
},
{
"epoch": 0.21,
"learning_rate": 3.9483916117228994e-05,
"loss": 1.083,
"step": 185500
},
{
"epoch": 0.21,
"learning_rate": 3.945556048313464e-05,
"loss": 1.0741,
"step": 186000
},
{
"epoch": 0.21,
"learning_rate": 3.942720484904028e-05,
"loss": 1.0697,
"step": 186500
},
{
"epoch": 0.21,
"learning_rate": 3.93989059262141e-05,
"loss": 1.0813,
"step": 187000
},
{
"epoch": 0.21,
"learning_rate": 3.9370550292119744e-05,
"loss": 1.0761,
"step": 187500
},
{
"epoch": 0.21,
"learning_rate": 3.9342194658025386e-05,
"loss": 1.0703,
"step": 188000
},
{
"epoch": 0.21,
"learning_rate": 3.931383902393102e-05,
"loss": 1.0769,
"step": 188500
},
{
"epoch": 0.21,
"learning_rate": 3.928554010110485e-05,
"loss": 1.0751,
"step": 189000
},
{
"epoch": 0.21,
"learning_rate": 3.925718446701049e-05,
"loss": 1.0826,
"step": 189500
},
{
"epoch": 0.22,
"learning_rate": 3.922882883291613e-05,
"loss": 1.0833,
"step": 190000
},
{
"epoch": 0.22,
"learning_rate": 3.920047319882177e-05,
"loss": 1.0772,
"step": 190500
},
{
"epoch": 0.22,
"learning_rate": 3.9172174275995596e-05,
"loss": 1.086,
"step": 191000
},
{
"epoch": 0.22,
"learning_rate": 3.914381864190124e-05,
"loss": 1.0773,
"step": 191500
},
{
"epoch": 0.22,
"learning_rate": 3.911551971907506e-05,
"loss": 1.0649,
"step": 192000
},
{
"epoch": 0.22,
"learning_rate": 3.9087164084980704e-05,
"loss": 1.0756,
"step": 192500
},
{
"epoch": 0.22,
"learning_rate": 3.905880845088634e-05,
"loss": 1.0754,
"step": 193000
},
{
"epoch": 0.22,
"learning_rate": 3.903045281679198e-05,
"loss": 1.0733,
"step": 193500
},
{
"epoch": 0.22,
"learning_rate": 3.900209718269762e-05,
"loss": 1.0722,
"step": 194000
},
{
"epoch": 0.22,
"learning_rate": 3.897379825987145e-05,
"loss": 1.0726,
"step": 194500
},
{
"epoch": 0.22,
"learning_rate": 3.894549933704528e-05,
"loss": 1.0719,
"step": 195000
},
{
"epoch": 0.22,
"learning_rate": 3.891714370295091e-05,
"loss": 1.0746,
"step": 195500
},
{
"epoch": 0.22,
"learning_rate": 3.8888788068856555e-05,
"loss": 1.0777,
"step": 196000
},
{
"epoch": 0.22,
"learning_rate": 3.88604324347622e-05,
"loss": 1.0766,
"step": 196500
},
{
"epoch": 0.22,
"learning_rate": 3.883207680066783e-05,
"loss": 1.0698,
"step": 197000
},
{
"epoch": 0.22,
"learning_rate": 3.8803721166573474e-05,
"loss": 1.0802,
"step": 197500
},
{
"epoch": 0.22,
"learning_rate": 3.8775365532479116e-05,
"loss": 1.0729,
"step": 198000
},
{
"epoch": 0.23,
"learning_rate": 3.874700989838475e-05,
"loss": 1.0793,
"step": 198500
},
{
"epoch": 0.23,
"learning_rate": 3.871865426429039e-05,
"loss": 1.0716,
"step": 199000
},
{
"epoch": 0.23,
"learning_rate": 3.869029863019603e-05,
"loss": 1.0747,
"step": 199500
},
{
"epoch": 0.23,
"learning_rate": 3.866194299610167e-05,
"loss": 1.0826,
"step": 200000
},
{
"epoch": 0.23,
"learning_rate": 3.86336440732755e-05,
"loss": 1.0724,
"step": 200500
},
{
"epoch": 0.23,
"learning_rate": 3.8605345150449326e-05,
"loss": 1.067,
"step": 201000
},
{
"epoch": 0.23,
"learning_rate": 3.857698951635497e-05,
"loss": 1.0701,
"step": 201500
},
{
"epoch": 0.23,
"learning_rate": 3.85486338822606e-05,
"loss": 1.0726,
"step": 202000
},
{
"epoch": 0.23,
"learning_rate": 3.8520278248166245e-05,
"loss": 1.073,
"step": 202500
},
{
"epoch": 0.23,
"learning_rate": 3.849192261407189e-05,
"loss": 1.0718,
"step": 203000
},
{
"epoch": 0.23,
"learning_rate": 3.846356697997752e-05,
"loss": 1.0829,
"step": 203500
},
{
"epoch": 0.23,
"learning_rate": 3.843521134588316e-05,
"loss": 1.0724,
"step": 204000
},
{
"epoch": 0.23,
"learning_rate": 3.840691242305699e-05,
"loss": 1.0734,
"step": 204500
},
{
"epoch": 0.23,
"learning_rate": 3.837855678896263e-05,
"loss": 1.0741,
"step": 205000
},
{
"epoch": 0.23,
"learning_rate": 3.8350201154868265e-05,
"loss": 1.0681,
"step": 205500
},
{
"epoch": 0.23,
"learning_rate": 3.832184552077391e-05,
"loss": 1.0762,
"step": 206000
},
{
"epoch": 0.23,
"learning_rate": 3.829348988667954e-05,
"loss": 1.0755,
"step": 206500
},
{
"epoch": 0.23,
"learning_rate": 3.8265134252585185e-05,
"loss": 1.0728,
"step": 207000
},
{
"epoch": 0.24,
"learning_rate": 3.8236778618490826e-05,
"loss": 1.0674,
"step": 207500
},
{
"epoch": 0.24,
"learning_rate": 3.820842298439646e-05,
"loss": 1.0653,
"step": 208000
},
{
"epoch": 0.24,
"learning_rate": 3.8180067350302104e-05,
"loss": 1.0694,
"step": 208500
},
{
"epoch": 0.24,
"learning_rate": 3.8151768427475935e-05,
"loss": 1.0635,
"step": 209000
},
{
"epoch": 0.24,
"learning_rate": 3.812341279338157e-05,
"loss": 1.0777,
"step": 209500
},
{
"epoch": 0.24,
"learning_rate": 3.809505715928721e-05,
"loss": 1.0659,
"step": 210000
},
{
"epoch": 0.24,
"learning_rate": 3.806670152519285e-05,
"loss": 1.0675,
"step": 210500
},
{
"epoch": 0.24,
"learning_rate": 3.803834589109849e-05,
"loss": 1.0763,
"step": 211000
},
{
"epoch": 0.24,
"learning_rate": 3.8009990257004124e-05,
"loss": 1.0518,
"step": 211500
},
{
"epoch": 0.24,
"learning_rate": 3.7981634622909766e-05,
"loss": 1.0768,
"step": 212000
},
{
"epoch": 0.24,
"learning_rate": 3.795327898881541e-05,
"loss": 1.0753,
"step": 212500
},
{
"epoch": 0.24,
"learning_rate": 3.7924923354721043e-05,
"loss": 1.067,
"step": 213000
},
{
"epoch": 0.24,
"learning_rate": 3.7896624431894874e-05,
"loss": 1.0686,
"step": 213500
},
{
"epoch": 0.24,
"learning_rate": 3.786826879780051e-05,
"loss": 1.0754,
"step": 214000
},
{
"epoch": 0.24,
"learning_rate": 3.783991316370615e-05,
"loss": 1.0623,
"step": 214500
},
{
"epoch": 0.24,
"learning_rate": 3.7811557529611793e-05,
"loss": 1.0638,
"step": 215000
},
{
"epoch": 0.24,
"learning_rate": 3.778320189551743e-05,
"loss": 1.0659,
"step": 215500
},
{
"epoch": 0.24,
"learning_rate": 3.775484626142307e-05,
"loss": 1.0634,
"step": 216000
},
{
"epoch": 0.25,
"learning_rate": 3.7726490627328706e-05,
"loss": 1.0814,
"step": 216500
},
{
"epoch": 0.25,
"learning_rate": 3.769813499323435e-05,
"loss": 1.0723,
"step": 217000
},
{
"epoch": 0.25,
"learning_rate": 3.766983607040818e-05,
"loss": 1.0724,
"step": 217500
},
{
"epoch": 0.25,
"learning_rate": 3.7641480436313814e-05,
"loss": 1.0654,
"step": 218000
},
{
"epoch": 0.25,
"learning_rate": 3.761312480221945e-05,
"loss": 1.0594,
"step": 218500
},
{
"epoch": 0.25,
"learning_rate": 3.75847691681251e-05,
"loss": 1.0684,
"step": 219000
},
{
"epoch": 0.25,
"learning_rate": 3.755641353403073e-05,
"loss": 1.0603,
"step": 219500
},
{
"epoch": 0.25,
"learning_rate": 3.7528114611204564e-05,
"loss": 1.0627,
"step": 220000
},
{
"epoch": 0.25,
"learning_rate": 3.749981568837839e-05,
"loss": 1.0682,
"step": 220500
},
{
"epoch": 0.25,
"learning_rate": 3.747146005428403e-05,
"loss": 1.0624,
"step": 221000
},
{
"epoch": 0.25,
"learning_rate": 3.7443104420189665e-05,
"loss": 1.0663,
"step": 221500
},
{
"epoch": 0.25,
"learning_rate": 3.741474878609531e-05,
"loss": 1.0591,
"step": 222000
},
{
"epoch": 0.25,
"learning_rate": 3.738639315200095e-05,
"loss": 1.0656,
"step": 222500
},
{
"epoch": 0.25,
"learning_rate": 3.7358037517906585e-05,
"loss": 1.0611,
"step": 223000
},
{
"epoch": 0.25,
"learning_rate": 3.732968188381222e-05,
"loss": 1.0673,
"step": 223500
},
{
"epoch": 0.25,
"learning_rate": 3.730132624971786e-05,
"loss": 1.0745,
"step": 224000
},
{
"epoch": 0.25,
"learning_rate": 3.727302732689169e-05,
"loss": 1.0636,
"step": 224500
},
{
"epoch": 0.26,
"learning_rate": 3.724467169279733e-05,
"loss": 1.0634,
"step": 225000
},
{
"epoch": 0.26,
"learning_rate": 3.721631605870297e-05,
"loss": 1.069,
"step": 225500
},
{
"epoch": 0.26,
"learning_rate": 3.718796042460861e-05,
"loss": 1.0601,
"step": 226000
},
{
"epoch": 0.26,
"learning_rate": 3.715960479051425e-05,
"loss": 1.0631,
"step": 226500
},
{
"epoch": 0.26,
"learning_rate": 3.713124915641989e-05,
"loss": 1.07,
"step": 227000
},
{
"epoch": 0.26,
"learning_rate": 3.7102893522325524e-05,
"loss": 1.0714,
"step": 227500
},
{
"epoch": 0.26,
"learning_rate": 3.7074537888231166e-05,
"loss": 1.0717,
"step": 228000
},
{
"epoch": 0.26,
"learning_rate": 3.704618225413681e-05,
"loss": 1.0608,
"step": 228500
},
{
"epoch": 0.26,
"learning_rate": 3.7017826620042443e-05,
"loss": 1.0585,
"step": 229000
},
{
"epoch": 0.26,
"learning_rate": 3.6989527697216274e-05,
"loss": 1.0588,
"step": 229500
},
{
"epoch": 0.26,
"learning_rate": 3.69612287743901e-05,
"loss": 1.0633,
"step": 230000
},
{
"epoch": 0.26,
"learning_rate": 3.693287314029574e-05,
"loss": 1.0741,
"step": 230500
},
{
"epoch": 0.26,
"learning_rate": 3.690451750620138e-05,
"loss": 1.0622,
"step": 231000
},
{
"epoch": 0.26,
"learning_rate": 3.687616187210702e-05,
"loss": 1.0739,
"step": 231500
},
{
"epoch": 0.26,
"learning_rate": 3.684780623801265e-05,
"loss": 1.0569,
"step": 232000
},
{
"epoch": 0.26,
"learning_rate": 3.6819507315186484e-05,
"loss": 1.064,
"step": 232500
},
{
"epoch": 0.26,
"learning_rate": 3.6791151681092126e-05,
"loss": 1.0649,
"step": 233000
},
{
"epoch": 0.26,
"learning_rate": 3.6762852758265956e-05,
"loss": 1.067,
"step": 233500
},
{
"epoch": 0.27,
"learning_rate": 3.673449712417159e-05,
"loss": 1.0709,
"step": 234000
},
{
"epoch": 0.27,
"learning_rate": 3.6706141490077234e-05,
"loss": 1.0565,
"step": 234500
},
{
"epoch": 0.27,
"learning_rate": 3.667778585598287e-05,
"loss": 1.0694,
"step": 235000
},
{
"epoch": 0.27,
"learning_rate": 3.664943022188851e-05,
"loss": 1.0597,
"step": 235500
},
{
"epoch": 0.27,
"learning_rate": 3.6621074587794146e-05,
"loss": 1.0597,
"step": 236000
},
{
"epoch": 0.27,
"learning_rate": 3.659271895369979e-05,
"loss": 1.0481,
"step": 236500
},
{
"epoch": 0.27,
"learning_rate": 3.656436331960543e-05,
"loss": 1.066,
"step": 237000
},
{
"epoch": 0.27,
"learning_rate": 3.6536007685511065e-05,
"loss": 1.0637,
"step": 237500
},
{
"epoch": 0.27,
"learning_rate": 3.650765205141671e-05,
"loss": 1.0579,
"step": 238000
},
{
"epoch": 0.27,
"learning_rate": 3.647935312859053e-05,
"loss": 1.0722,
"step": 238500
},
{
"epoch": 0.27,
"learning_rate": 3.645105420576436e-05,
"loss": 1.0723,
"step": 239000
},
{
"epoch": 0.27,
"learning_rate": 3.642269857167e-05,
"loss": 1.0714,
"step": 239500
},
{
"epoch": 0.27,
"learning_rate": 3.6394342937575646e-05,
"loss": 1.0566,
"step": 240000
},
{
"epoch": 0.27,
"learning_rate": 3.636598730348128e-05,
"loss": 1.062,
"step": 240500
},
{
"epoch": 0.27,
"learning_rate": 3.633763166938692e-05,
"loss": 1.0604,
"step": 241000
},
{
"epoch": 0.27,
"learning_rate": 3.630927603529256e-05,
"loss": 1.0525,
"step": 241500
},
{
"epoch": 0.27,
"learning_rate": 3.62809204011982e-05,
"loss": 1.0616,
"step": 242000
},
{
"epoch": 0.28,
"learning_rate": 3.6252621478372025e-05,
"loss": 1.061,
"step": 242500
},
{
"epoch": 0.28,
"learning_rate": 3.622426584427767e-05,
"loss": 1.0648,
"step": 243000
},
{
"epoch": 0.28,
"learning_rate": 3.61959102101833e-05,
"loss": 1.05,
"step": 243500
},
{
"epoch": 0.28,
"learning_rate": 3.6167554576088944e-05,
"loss": 1.0543,
"step": 244000
},
{
"epoch": 0.28,
"learning_rate": 3.6139198941994586e-05,
"loss": 1.0648,
"step": 244500
},
{
"epoch": 0.28,
"learning_rate": 3.611084330790022e-05,
"loss": 1.0633,
"step": 245000
},
{
"epoch": 0.28,
"learning_rate": 3.608254438507405e-05,
"loss": 1.0491,
"step": 245500
},
{
"epoch": 0.28,
"learning_rate": 3.605418875097969e-05,
"loss": 1.056,
"step": 246000
},
{
"epoch": 0.28,
"learning_rate": 3.602583311688533e-05,
"loss": 1.0641,
"step": 246500
},
{
"epoch": 0.28,
"learning_rate": 3.599747748279097e-05,
"loss": 1.0672,
"step": 247000
},
{
"epoch": 0.28,
"learning_rate": 3.5969121848696606e-05,
"loss": 1.0645,
"step": 247500
},
{
"epoch": 0.28,
"learning_rate": 3.594082292587043e-05,
"loss": 1.0632,
"step": 248000
},
{
"epoch": 0.28,
"learning_rate": 3.591246729177607e-05,
"loss": 1.0574,
"step": 248500
},
{
"epoch": 0.28,
"learning_rate": 3.5884111657681714e-05,
"loss": 1.0641,
"step": 249000
},
{
"epoch": 0.28,
"learning_rate": 3.585575602358735e-05,
"loss": 1.0603,
"step": 249500
},
{
"epoch": 0.28,
"learning_rate": 3.582740038949299e-05,
"loss": 1.0642,
"step": 250000
},
{
"epoch": 0.28,
"learning_rate": 3.5799101466666816e-05,
"loss": 1.0594,
"step": 250500
},
{
"epoch": 0.28,
"learning_rate": 3.577074583257246e-05,
"loss": 1.0665,
"step": 251000
},
{
"epoch": 0.29,
"learning_rate": 3.57423901984781e-05,
"loss": 1.0579,
"step": 251500
},
{
"epoch": 0.29,
"learning_rate": 3.5714034564383735e-05,
"loss": 1.063,
"step": 252000
},
{
"epoch": 0.29,
"learning_rate": 3.568567893028938e-05,
"loss": 1.0591,
"step": 252500
},
{
"epoch": 0.29,
"learning_rate": 3.565732329619502e-05,
"loss": 1.0643,
"step": 253000
},
{
"epoch": 0.29,
"learning_rate": 3.5628967662100654e-05,
"loss": 1.0502,
"step": 253500
},
{
"epoch": 0.29,
"learning_rate": 3.5600725450542674e-05,
"loss": 1.0512,
"step": 254000
},
{
"epoch": 0.29,
"learning_rate": 3.557236981644831e-05,
"loss": 1.0624,
"step": 254500
},
{
"epoch": 0.29,
"learning_rate": 3.554401418235395e-05,
"loss": 1.0603,
"step": 255000
},
{
"epoch": 0.29,
"learning_rate": 3.5515658548259586e-05,
"loss": 1.0609,
"step": 255500
},
{
"epoch": 0.29,
"learning_rate": 3.548730291416523e-05,
"loss": 1.0542,
"step": 256000
},
{
"epoch": 0.29,
"learning_rate": 3.545894728007087e-05,
"loss": 1.0546,
"step": 256500
},
{
"epoch": 0.29,
"learning_rate": 3.5430591645976505e-05,
"loss": 1.0575,
"step": 257000
},
{
"epoch": 0.29,
"learning_rate": 3.540223601188215e-05,
"loss": 1.0603,
"step": 257500
},
{
"epoch": 0.29,
"learning_rate": 3.537388037778779e-05,
"loss": 1.0464,
"step": 258000
},
{
"epoch": 0.29,
"learning_rate": 3.5345581454961614e-05,
"loss": 1.0492,
"step": 258500
},
{
"epoch": 0.29,
"learning_rate": 3.5317225820867256e-05,
"loss": 1.0553,
"step": 259000
},
{
"epoch": 0.29,
"learning_rate": 3.528887018677289e-05,
"loss": 1.0577,
"step": 259500
},
{
"epoch": 0.29,
"learning_rate": 3.526051455267853e-05,
"loss": 1.0579,
"step": 260000
},
{
"epoch": 0.3,
"learning_rate": 3.523215891858417e-05,
"loss": 1.0637,
"step": 260500
},
{
"epoch": 0.3,
"learning_rate": 3.520380328448981e-05,
"loss": 1.0554,
"step": 261000
},
{
"epoch": 0.3,
"learning_rate": 3.517544765039545e-05,
"loss": 1.0501,
"step": 261500
},
{
"epoch": 0.3,
"learning_rate": 3.514709201630109e-05,
"loss": 1.0576,
"step": 262000
},
{
"epoch": 0.3,
"learning_rate": 3.511873638220673e-05,
"loss": 1.0535,
"step": 262500
},
{
"epoch": 0.3,
"learning_rate": 3.5090380748112364e-05,
"loss": 1.057,
"step": 263000
},
{
"epoch": 0.3,
"learning_rate": 3.5062025114018006e-05,
"loss": 1.0506,
"step": 263500
},
{
"epoch": 0.3,
"learning_rate": 3.503366947992365e-05,
"loss": 1.0555,
"step": 264000
},
{
"epoch": 0.3,
"learning_rate": 3.500537055709747e-05,
"loss": 1.0588,
"step": 264500
},
{
"epoch": 0.3,
"learning_rate": 3.4977014923003114e-05,
"loss": 1.0632,
"step": 265000
},
{
"epoch": 0.3,
"learning_rate": 3.494865928890875e-05,
"loss": 1.0404,
"step": 265500
},
{
"epoch": 0.3,
"learning_rate": 3.492030365481439e-05,
"loss": 1.0532,
"step": 266000
},
{
"epoch": 0.3,
"learning_rate": 3.4891948020720034e-05,
"loss": 1.0585,
"step": 266500
},
{
"epoch": 0.3,
"learning_rate": 3.486359238662567e-05,
"loss": 1.0621,
"step": 267000
},
{
"epoch": 0.3,
"learning_rate": 3.483523675253131e-05,
"loss": 1.0614,
"step": 267500
},
{
"epoch": 0.3,
"learning_rate": 3.4806881118436946e-05,
"loss": 1.0562,
"step": 268000
},
{
"epoch": 0.3,
"learning_rate": 3.477858219561078e-05,
"loss": 1.0502,
"step": 268500
},
{
"epoch": 0.31,
"learning_rate": 3.475028327278461e-05,
"loss": 1.054,
"step": 269000
},
{
"epoch": 0.31,
"learning_rate": 3.472192763869024e-05,
"loss": 1.0581,
"step": 269500
},
{
"epoch": 0.31,
"learning_rate": 3.469357200459588e-05,
"loss": 1.0417,
"step": 270000
},
{
"epoch": 0.31,
"learning_rate": 3.466521637050153e-05,
"loss": 1.0417,
"step": 270500
},
{
"epoch": 0.31,
"learning_rate": 3.463686073640716e-05,
"loss": 1.0432,
"step": 271000
},
{
"epoch": 0.31,
"learning_rate": 3.46085051023128e-05,
"loss": 1.0455,
"step": 271500
},
{
"epoch": 0.31,
"learning_rate": 3.458020617948663e-05,
"loss": 1.0507,
"step": 272000
},
{
"epoch": 0.31,
"learning_rate": 3.455185054539227e-05,
"loss": 1.0543,
"step": 272500
},
{
"epoch": 0.31,
"learning_rate": 3.4523494911297905e-05,
"loss": 1.055,
"step": 273000
},
{
"epoch": 0.31,
"learning_rate": 3.449513927720355e-05,
"loss": 1.0433,
"step": 273500
},
{
"epoch": 0.31,
"learning_rate": 3.446678364310918e-05,
"loss": 1.0495,
"step": 274000
},
{
"epoch": 0.31,
"learning_rate": 3.4438428009014825e-05,
"loss": 1.0601,
"step": 274500
},
{
"epoch": 0.31,
"learning_rate": 3.4410129086188656e-05,
"loss": 1.0542,
"step": 275000
},
{
"epoch": 0.31,
"learning_rate": 3.438177345209429e-05,
"loss": 1.0472,
"step": 275500
},
{
"epoch": 0.31,
"learning_rate": 3.435341781799993e-05,
"loss": 1.0475,
"step": 276000
},
{
"epoch": 0.31,
"learning_rate": 3.432506218390557e-05,
"loss": 1.0495,
"step": 276500
},
{
"epoch": 0.31,
"learning_rate": 3.429670654981121e-05,
"loss": 1.0514,
"step": 277000
},
{
"epoch": 0.31,
"learning_rate": 3.426835091571685e-05,
"loss": 1.0404,
"step": 277500
},
{
"epoch": 0.32,
"learning_rate": 3.423999528162249e-05,
"loss": 1.0464,
"step": 278000
},
{
"epoch": 0.32,
"learning_rate": 3.421169635879632e-05,
"loss": 1.0518,
"step": 278500
},
{
"epoch": 0.32,
"learning_rate": 3.418334072470195e-05,
"loss": 1.0479,
"step": 279000
},
{
"epoch": 0.32,
"learning_rate": 3.4154985090607595e-05,
"loss": 1.0589,
"step": 279500
},
{
"epoch": 0.32,
"learning_rate": 3.412662945651323e-05,
"loss": 1.0594,
"step": 280000
},
{
"epoch": 0.32,
"learning_rate": 3.409833053368706e-05,
"loss": 1.0479,
"step": 280500
},
{
"epoch": 0.32,
"learning_rate": 3.4069974899592696e-05,
"loss": 1.0449,
"step": 281000
},
{
"epoch": 0.32,
"learning_rate": 3.4041619265498345e-05,
"loss": 1.0561,
"step": 281500
},
{
"epoch": 0.32,
"learning_rate": 3.401326363140398e-05,
"loss": 1.0455,
"step": 282000
},
{
"epoch": 0.32,
"learning_rate": 3.3984907997309616e-05,
"loss": 1.051,
"step": 282500
},
{
"epoch": 0.32,
"learning_rate": 3.395655236321526e-05,
"loss": 1.0445,
"step": 283000
},
{
"epoch": 0.32,
"learning_rate": 3.39281967291209e-05,
"loss": 1.0516,
"step": 283500
},
{
"epoch": 0.32,
"learning_rate": 3.3899841095026535e-05,
"loss": 1.0578,
"step": 284000
},
{
"epoch": 0.32,
"learning_rate": 3.3871542172200366e-05,
"loss": 1.0471,
"step": 284500
},
{
"epoch": 0.32,
"learning_rate": 3.3843186538106e-05,
"loss": 1.0518,
"step": 285000
},
{
"epoch": 0.32,
"learning_rate": 3.381488761527983e-05,
"loss": 1.0426,
"step": 285500
},
{
"epoch": 0.32,
"learning_rate": 3.3786531981185474e-05,
"loss": 1.0514,
"step": 286000
},
{
"epoch": 0.32,
"learning_rate": 3.375817634709111e-05,
"loss": 1.0381,
"step": 286500
},
{
"epoch": 0.33,
"learning_rate": 3.372982071299675e-05,
"loss": 1.0435,
"step": 287000
},
{
"epoch": 0.33,
"learning_rate": 3.3701465078902386e-05,
"loss": 1.0402,
"step": 287500
},
{
"epoch": 0.33,
"learning_rate": 3.367310944480803e-05,
"loss": 1.0431,
"step": 288000
},
{
"epoch": 0.33,
"learning_rate": 3.364475381071367e-05,
"loss": 1.0354,
"step": 288500
},
{
"epoch": 0.33,
"learning_rate": 3.3616398176619305e-05,
"loss": 1.0472,
"step": 289000
},
{
"epoch": 0.33,
"learning_rate": 3.3588099253793136e-05,
"loss": 1.0379,
"step": 289500
},
{
"epoch": 0.33,
"learning_rate": 3.355974361969877e-05,
"loss": 1.0459,
"step": 290000
},
{
"epoch": 0.33,
"learning_rate": 3.3531387985604414e-05,
"loss": 1.0439,
"step": 290500
},
{
"epoch": 0.33,
"learning_rate": 3.3503032351510056e-05,
"loss": 1.0515,
"step": 291000
},
{
"epoch": 0.33,
"learning_rate": 3.347467671741569e-05,
"loss": 1.0502,
"step": 291500
},
{
"epoch": 0.33,
"learning_rate": 3.344632108332133e-05,
"loss": 1.0481,
"step": 292000
},
{
"epoch": 0.33,
"learning_rate": 3.341796544922697e-05,
"loss": 1.0388,
"step": 292500
},
{
"epoch": 0.33,
"learning_rate": 3.338960981513261e-05,
"loss": 1.0446,
"step": 293000
},
{
"epoch": 0.33,
"learning_rate": 3.3361310892306434e-05,
"loss": 1.0507,
"step": 293500
},
{
"epoch": 0.33,
"learning_rate": 3.3332955258212076e-05,
"loss": 1.0462,
"step": 294000
},
{
"epoch": 0.33,
"learning_rate": 3.330459962411772e-05,
"loss": 1.0454,
"step": 294500
},
{
"epoch": 0.33,
"learning_rate": 3.327624399002335e-05,
"loss": 1.0449,
"step": 295000
},
{
"epoch": 0.34,
"learning_rate": 3.3247888355928995e-05,
"loss": 1.0383,
"step": 295500
},
{
"epoch": 0.34,
"learning_rate": 3.321953272183464e-05,
"loss": 1.055,
"step": 296000
},
{
"epoch": 0.34,
"learning_rate": 3.319117708774027e-05,
"loss": 1.0455,
"step": 296500
},
{
"epoch": 0.34,
"learning_rate": 3.3162821453645914e-05,
"loss": 1.0551,
"step": 297000
},
{
"epoch": 0.34,
"learning_rate": 3.313446581955155e-05,
"loss": 1.0504,
"step": 297500
},
{
"epoch": 0.34,
"learning_rate": 3.3106166896725374e-05,
"loss": 1.0321,
"step": 298000
},
{
"epoch": 0.34,
"learning_rate": 3.307781126263102e-05,
"loss": 1.0465,
"step": 298500
},
{
"epoch": 0.34,
"learning_rate": 3.304945562853666e-05,
"loss": 1.0394,
"step": 299000
},
{
"epoch": 0.34,
"learning_rate": 3.302109999444229e-05,
"loss": 1.0492,
"step": 299500
},
{
"epoch": 0.34,
"learning_rate": 3.2992801071616124e-05,
"loss": 1.047,
"step": 300000
},
{
"epoch": 0.34,
"learning_rate": 3.2964445437521766e-05,
"loss": 1.0467,
"step": 300500
},
{
"epoch": 0.34,
"learning_rate": 3.293614651469559e-05,
"loss": 1.0346,
"step": 301000
},
{
"epoch": 0.34,
"learning_rate": 3.290779088060123e-05,
"loss": 1.0463,
"step": 301500
},
{
"epoch": 0.34,
"learning_rate": 3.2879435246506874e-05,
"loss": 1.0604,
"step": 302000
},
{
"epoch": 0.34,
"learning_rate": 3.285107961241251e-05,
"loss": 1.0509,
"step": 302500
},
{
"epoch": 0.34,
"learning_rate": 3.282272397831815e-05,
"loss": 1.0409,
"step": 303000
},
{
"epoch": 0.34,
"learning_rate": 3.279442505549198e-05,
"loss": 1.046,
"step": 303500
},
{
"epoch": 0.34,
"learning_rate": 3.276606942139762e-05,
"loss": 1.0413,
"step": 304000
},
{
"epoch": 0.35,
"learning_rate": 3.273777049857145e-05,
"loss": 1.0476,
"step": 304500
},
{
"epoch": 0.35,
"learning_rate": 3.270941486447708e-05,
"loss": 1.0441,
"step": 305000
},
{
"epoch": 0.35,
"learning_rate": 3.268105923038272e-05,
"loss": 1.041,
"step": 305500
},
{
"epoch": 0.35,
"learning_rate": 3.265270359628837e-05,
"loss": 1.0403,
"step": 306000
},
{
"epoch": 0.35,
"learning_rate": 3.2624347962194e-05,
"loss": 1.0397,
"step": 306500
},
{
"epoch": 0.35,
"learning_rate": 3.259599232809964e-05,
"loss": 1.0371,
"step": 307000
},
{
"epoch": 0.35,
"learning_rate": 3.256769340527347e-05,
"loss": 1.0354,
"step": 307500
},
{
"epoch": 0.35,
"learning_rate": 3.2539337771179104e-05,
"loss": 1.0436,
"step": 308000
},
{
"epoch": 0.35,
"learning_rate": 3.251098213708475e-05,
"loss": 1.0415,
"step": 308500
},
{
"epoch": 0.35,
"learning_rate": 3.248262650299039e-05,
"loss": 1.0357,
"step": 309000
},
{
"epoch": 0.35,
"learning_rate": 3.245427086889602e-05,
"loss": 1.0567,
"step": 309500
},
{
"epoch": 0.35,
"learning_rate": 3.2425915234801665e-05,
"loss": 1.0387,
"step": 310000
},
{
"epoch": 0.35,
"learning_rate": 3.239755960070731e-05,
"loss": 1.0362,
"step": 310500
},
{
"epoch": 0.35,
"learning_rate": 3.236920396661294e-05,
"loss": 1.0497,
"step": 311000
},
{
"epoch": 0.35,
"learning_rate": 3.2340848332518584e-05,
"loss": 1.0333,
"step": 311500
},
{
"epoch": 0.35,
"learning_rate": 3.231254940969241e-05,
"loss": 1.0323,
"step": 312000
},
{
"epoch": 0.35,
"learning_rate": 3.228419377559805e-05,
"loss": 1.0385,
"step": 312500
},
{
"epoch": 0.36,
"learning_rate": 3.225583814150369e-05,
"loss": 1.0244,
"step": 313000
},
{
"epoch": 0.36,
"learning_rate": 3.222748250740933e-05,
"loss": 1.0379,
"step": 313500
},
{
"epoch": 0.36,
"learning_rate": 3.219912687331497e-05,
"loss": 1.0411,
"step": 314000
},
{
"epoch": 0.36,
"learning_rate": 3.217077123922061e-05,
"loss": 1.0482,
"step": 314500
},
{
"epoch": 0.36,
"learning_rate": 3.2142472316394435e-05,
"loss": 1.0341,
"step": 315000
},
{
"epoch": 0.36,
"learning_rate": 3.211411668230008e-05,
"loss": 1.0411,
"step": 315500
},
{
"epoch": 0.36,
"learning_rate": 3.208576104820571e-05,
"loss": 1.0341,
"step": 316000
},
{
"epoch": 0.36,
"learning_rate": 3.2057405414111355e-05,
"loss": 1.0346,
"step": 316500
},
{
"epoch": 0.36,
"learning_rate": 3.202904978001699e-05,
"loss": 1.0438,
"step": 317000
},
{
"epoch": 0.36,
"learning_rate": 3.200069414592263e-05,
"loss": 1.0415,
"step": 317500
},
{
"epoch": 0.36,
"learning_rate": 3.197233851182827e-05,
"loss": 1.0418,
"step": 318000
},
{
"epoch": 0.36,
"learning_rate": 3.194398287773391e-05,
"loss": 1.0355,
"step": 318500
},
{
"epoch": 0.36,
"learning_rate": 3.191562724363955e-05,
"loss": 1.0396,
"step": 319000
},
{
"epoch": 0.36,
"learning_rate": 3.1887271609545186e-05,
"loss": 1.0383,
"step": 319500
},
{
"epoch": 0.36,
"learning_rate": 3.185897268671902e-05,
"loss": 1.0381,
"step": 320000
},
{
"epoch": 0.36,
"learning_rate": 3.183061705262466e-05,
"loss": 1.0418,
"step": 320500
},
{
"epoch": 0.36,
"learning_rate": 3.180231812979848e-05,
"loss": 1.0422,
"step": 321000
},
{
"epoch": 0.36,
"learning_rate": 3.1773962495704125e-05,
"loss": 1.0352,
"step": 321500
},
{
"epoch": 0.37,
"learning_rate": 3.174560686160976e-05,
"loss": 1.0425,
"step": 322000
},
{
"epoch": 0.37,
"learning_rate": 3.1717251227515396e-05,
"loss": 1.0394,
"step": 322500
},
{
"epoch": 0.37,
"learning_rate": 3.1688952304689226e-05,
"loss": 1.034,
"step": 323000
},
{
"epoch": 0.37,
"learning_rate": 3.166059667059487e-05,
"loss": 1.0391,
"step": 323500
},
{
"epoch": 0.37,
"learning_rate": 3.163224103650051e-05,
"loss": 1.0363,
"step": 324000
},
{
"epoch": 0.37,
"learning_rate": 3.1603885402406146e-05,
"loss": 1.0363,
"step": 324500
},
{
"epoch": 0.37,
"learning_rate": 3.157552976831179e-05,
"loss": 1.0355,
"step": 325000
},
{
"epoch": 0.37,
"learning_rate": 3.154717413421743e-05,
"loss": 1.0344,
"step": 325500
},
{
"epoch": 0.37,
"learning_rate": 3.1518818500123065e-05,
"loss": 1.0348,
"step": 326000
},
{
"epoch": 0.37,
"learning_rate": 3.1490519577296896e-05,
"loss": 1.0304,
"step": 326500
},
{
"epoch": 0.37,
"learning_rate": 3.146216394320253e-05,
"loss": 1.0398,
"step": 327000
},
{
"epoch": 0.37,
"learning_rate": 3.143386502037636e-05,
"loss": 1.0305,
"step": 327500
},
{
"epoch": 0.37,
"learning_rate": 3.1405509386282004e-05,
"loss": 1.0399,
"step": 328000
},
{
"epoch": 0.37,
"learning_rate": 3.137715375218764e-05,
"loss": 1.0438,
"step": 328500
},
{
"epoch": 0.37,
"learning_rate": 3.1348798118093274e-05,
"loss": 1.0422,
"step": 329000
},
{
"epoch": 0.37,
"learning_rate": 3.1320442483998916e-05,
"loss": 1.0397,
"step": 329500
},
{
"epoch": 0.37,
"learning_rate": 3.129208684990456e-05,
"loss": 1.034,
"step": 330000
},
{
"epoch": 0.37,
"learning_rate": 3.1263731215810193e-05,
"loss": 1.0411,
"step": 330500
},
{
"epoch": 0.38,
"learning_rate": 3.1235375581715835e-05,
"loss": 1.0361,
"step": 331000
},
{
"epoch": 0.38,
"learning_rate": 3.120707665888966e-05,
"loss": 1.0304,
"step": 331500
},
{
"epoch": 0.38,
"learning_rate": 3.11787210247953e-05,
"loss": 1.0402,
"step": 332000
},
{
"epoch": 0.38,
"learning_rate": 3.1150365390700943e-05,
"loss": 1.0353,
"step": 332500
},
{
"epoch": 0.38,
"learning_rate": 3.112200975660658e-05,
"loss": 1.0403,
"step": 333000
},
{
"epoch": 0.38,
"learning_rate": 3.109365412251222e-05,
"loss": 1.0377,
"step": 333500
},
{
"epoch": 0.38,
"learning_rate": 3.106529848841786e-05,
"loss": 1.0377,
"step": 334000
},
{
"epoch": 0.38,
"learning_rate": 3.10369428543235e-05,
"loss": 1.0374,
"step": 334500
},
{
"epoch": 0.38,
"learning_rate": 3.100858722022913e-05,
"loss": 1.0323,
"step": 335000
},
{
"epoch": 0.38,
"learning_rate": 3.0980288297402964e-05,
"loss": 1.0321,
"step": 335500
},
{
"epoch": 0.38,
"learning_rate": 3.0951932663308606e-05,
"loss": 1.0294,
"step": 336000
},
{
"epoch": 0.38,
"learning_rate": 3.092357702921425e-05,
"loss": 1.0446,
"step": 336500
},
{
"epoch": 0.38,
"learning_rate": 3.089522139511988e-05,
"loss": 1.0369,
"step": 337000
},
{
"epoch": 0.38,
"learning_rate": 3.0866922472293714e-05,
"loss": 1.0385,
"step": 337500
},
{
"epoch": 0.38,
"learning_rate": 3.083856683819935e-05,
"loss": 1.0312,
"step": 338000
},
{
"epoch": 0.38,
"learning_rate": 3.081021120410499e-05,
"loss": 1.0368,
"step": 338500
},
{
"epoch": 0.38,
"learning_rate": 3.078185557001063e-05,
"loss": 1.0346,
"step": 339000
},
{
"epoch": 0.39,
"learning_rate": 3.075349993591627e-05,
"loss": 1.0374,
"step": 339500
},
{
"epoch": 0.39,
"learning_rate": 3.0725144301821904e-05,
"loss": 1.0403,
"step": 340000
},
{
"epoch": 0.39,
"learning_rate": 3.069678866772755e-05,
"loss": 1.0405,
"step": 340500
},
{
"epoch": 0.39,
"learning_rate": 3.066843303363319e-05,
"loss": 1.0413,
"step": 341000
},
{
"epoch": 0.39,
"learning_rate": 3.064013411080701e-05,
"loss": 1.0441,
"step": 341500
},
{
"epoch": 0.39,
"learning_rate": 3.0611778476712654e-05,
"loss": 1.0337,
"step": 342000
},
{
"epoch": 0.39,
"learning_rate": 3.058342284261829e-05,
"loss": 1.0377,
"step": 342500
},
{
"epoch": 0.39,
"learning_rate": 3.055506720852393e-05,
"loss": 1.0303,
"step": 343000
},
{
"epoch": 0.39,
"learning_rate": 3.052671157442957e-05,
"loss": 1.0263,
"step": 343500
},
{
"epoch": 0.39,
"learning_rate": 3.049835594033521e-05,
"loss": 1.027,
"step": 344000
},
{
"epoch": 0.39,
"learning_rate": 3.047005701750904e-05,
"loss": 1.0238,
"step": 344500
},
{
"epoch": 0.39,
"learning_rate": 3.0441701383414678e-05,
"loss": 1.0358,
"step": 345000
},
{
"epoch": 0.39,
"learning_rate": 3.0413345749320316e-05,
"loss": 1.0365,
"step": 345500
},
{
"epoch": 0.39,
"learning_rate": 3.0384990115225958e-05,
"loss": 1.031,
"step": 346000
},
{
"epoch": 0.39,
"learning_rate": 3.0356634481131597e-05,
"loss": 1.0359,
"step": 346500
},
{
"epoch": 0.39,
"learning_rate": 3.0328278847037232e-05,
"loss": 1.0326,
"step": 347000
},
{
"epoch": 0.39,
"learning_rate": 3.029992321294287e-05,
"loss": 1.0333,
"step": 347500
},
{
"epoch": 0.39,
"learning_rate": 3.0271567578848513e-05,
"loss": 1.0333,
"step": 348000
},
{
"epoch": 0.4,
"learning_rate": 3.024321194475415e-05,
"loss": 1.0384,
"step": 348500
},
{
"epoch": 0.4,
"learning_rate": 3.0214913021927982e-05,
"loss": 1.0247,
"step": 349000
},
{
"epoch": 0.4,
"learning_rate": 3.018655738783362e-05,
"loss": 1.0285,
"step": 349500
},
{
"epoch": 0.4,
"learning_rate": 3.0158201753739256e-05,
"loss": 1.0348,
"step": 350000
},
{
"epoch": 0.4,
"learning_rate": 3.0129902830913087e-05,
"loss": 1.0334,
"step": 350500
},
{
"epoch": 0.4,
"learning_rate": 3.0101547196818725e-05,
"loss": 1.0232,
"step": 351000
},
{
"epoch": 0.4,
"learning_rate": 3.0073191562724367e-05,
"loss": 1.0332,
"step": 351500
},
{
"epoch": 0.4,
"learning_rate": 3.0044835928630006e-05,
"loss": 1.0291,
"step": 352000
},
{
"epoch": 0.4,
"learning_rate": 3.001648029453564e-05,
"loss": 1.0249,
"step": 352500
},
{
"epoch": 0.4,
"learning_rate": 2.9988181371709472e-05,
"loss": 1.0328,
"step": 353000
},
{
"epoch": 0.4,
"learning_rate": 2.995982573761511e-05,
"loss": 1.0321,
"step": 353500
},
{
"epoch": 0.4,
"learning_rate": 2.993147010352075e-05,
"loss": 1.0271,
"step": 354000
},
{
"epoch": 0.4,
"learning_rate": 2.9903171180694577e-05,
"loss": 1.0408,
"step": 354500
},
{
"epoch": 0.4,
"learning_rate": 2.9874872257868407e-05,
"loss": 1.0263,
"step": 355000
},
{
"epoch": 0.4,
"learning_rate": 2.9846516623774046e-05,
"loss": 1.0365,
"step": 355500
},
{
"epoch": 0.4,
"learning_rate": 2.9818160989679685e-05,
"loss": 1.0225,
"step": 356000
},
{
"epoch": 0.4,
"learning_rate": 2.9789805355585327e-05,
"loss": 1.0367,
"step": 356500
},
{
"epoch": 0.4,
"learning_rate": 2.9761449721490965e-05,
"loss": 1.0261,
"step": 357000
},
{
"epoch": 0.41,
"learning_rate": 2.97330940873966e-05,
"loss": 1.0369,
"step": 357500
},
{
"epoch": 0.41,
"learning_rate": 2.970479516457043e-05,
"loss": 1.0264,
"step": 358000
},
{
"epoch": 0.41,
"learning_rate": 2.967643953047607e-05,
"loss": 1.0348,
"step": 358500
},
{
"epoch": 0.41,
"learning_rate": 2.9648083896381705e-05,
"loss": 1.0281,
"step": 359000
},
{
"epoch": 0.41,
"learning_rate": 2.961972826228735e-05,
"loss": 1.0275,
"step": 359500
},
{
"epoch": 0.41,
"learning_rate": 2.9591372628192986e-05,
"loss": 1.0419,
"step": 360000
},
{
"epoch": 0.41,
"learning_rate": 2.9563016994098624e-05,
"loss": 1.0338,
"step": 360500
},
{
"epoch": 0.41,
"learning_rate": 2.9534661360004266e-05,
"loss": 1.0356,
"step": 361000
},
{
"epoch": 0.41,
"learning_rate": 2.9506305725909905e-05,
"loss": 1.0257,
"step": 361500
},
{
"epoch": 0.41,
"learning_rate": 2.9478006803083736e-05,
"loss": 1.0314,
"step": 362000
},
{
"epoch": 0.41,
"learning_rate": 2.9449651168989374e-05,
"loss": 1.0287,
"step": 362500
},
{
"epoch": 0.41,
"learning_rate": 2.942129553489501e-05,
"loss": 1.0174,
"step": 363000
},
{
"epoch": 0.41,
"learning_rate": 2.9392939900800655e-05,
"loss": 1.0214,
"step": 363500
},
{
"epoch": 0.41,
"learning_rate": 2.936458426670629e-05,
"loss": 1.0258,
"step": 364000
},
{
"epoch": 0.41,
"learning_rate": 2.933622863261193e-05,
"loss": 1.021,
"step": 364500
},
{
"epoch": 0.41,
"learning_rate": 2.930787299851757e-05,
"loss": 1.0231,
"step": 365000
},
{
"epoch": 0.41,
"learning_rate": 2.9279574075691395e-05,
"loss": 1.0261,
"step": 365500
},
{
"epoch": 0.42,
"learning_rate": 2.9251218441597034e-05,
"loss": 1.0275,
"step": 366000
},
{
"epoch": 0.42,
"learning_rate": 2.9222862807502676e-05,
"loss": 1.0323,
"step": 366500
},
{
"epoch": 0.42,
"learning_rate": 2.9194507173408314e-05,
"loss": 1.0314,
"step": 367000
},
{
"epoch": 0.42,
"learning_rate": 2.9166151539313953e-05,
"loss": 1.026,
"step": 367500
},
{
"epoch": 0.42,
"learning_rate": 2.913785261648778e-05,
"loss": 1.0383,
"step": 368000
},
{
"epoch": 0.42,
"learning_rate": 2.910949698239342e-05,
"loss": 1.0296,
"step": 368500
},
{
"epoch": 0.42,
"learning_rate": 2.908114134829906e-05,
"loss": 1.0215,
"step": 369000
},
{
"epoch": 0.42,
"learning_rate": 2.90527857142047e-05,
"loss": 1.0247,
"step": 369500
},
{
"epoch": 0.42,
"learning_rate": 2.9024430080110338e-05,
"loss": 1.0266,
"step": 370000
},
{
"epoch": 0.42,
"learning_rate": 2.899607444601598e-05,
"loss": 1.0238,
"step": 370500
},
{
"epoch": 0.42,
"learning_rate": 2.896771881192162e-05,
"loss": 1.0221,
"step": 371000
},
{
"epoch": 0.42,
"learning_rate": 2.8939363177827257e-05,
"loss": 1.0131,
"step": 371500
},
{
"epoch": 0.42,
"learning_rate": 2.8911007543732892e-05,
"loss": 1.0217,
"step": 372000
},
{
"epoch": 0.42,
"learning_rate": 2.8882708620906723e-05,
"loss": 1.0158,
"step": 372500
},
{
"epoch": 0.42,
"learning_rate": 2.8854352986812362e-05,
"loss": 1.0243,
"step": 373000
},
{
"epoch": 0.42,
"learning_rate": 2.882605406398619e-05,
"loss": 1.0365,
"step": 373500
},
{
"epoch": 0.42,
"learning_rate": 2.8797698429891828e-05,
"loss": 1.0321,
"step": 374000
},
{
"epoch": 0.42,
"learning_rate": 2.876934279579747e-05,
"loss": 1.0244,
"step": 374500
},
{
"epoch": 0.43,
"learning_rate": 2.874098716170311e-05,
"loss": 1.0297,
"step": 375000
},
{
"epoch": 0.43,
"learning_rate": 2.8712631527608747e-05,
"loss": 1.0203,
"step": 375500
},
{
"epoch": 0.43,
"learning_rate": 2.8684332604782578e-05,
"loss": 1.0329,
"step": 376000
},
{
"epoch": 0.43,
"learning_rate": 2.8655976970688213e-05,
"loss": 1.0226,
"step": 376500
},
{
"epoch": 0.43,
"learning_rate": 2.862762133659386e-05,
"loss": 1.0218,
"step": 377000
},
{
"epoch": 0.43,
"learning_rate": 2.8599265702499494e-05,
"loss": 1.035,
"step": 377500
},
{
"epoch": 0.43,
"learning_rate": 2.8570910068405132e-05,
"loss": 1.0228,
"step": 378000
},
{
"epoch": 0.43,
"learning_rate": 2.854255443431077e-05,
"loss": 1.0219,
"step": 378500
},
{
"epoch": 0.43,
"learning_rate": 2.85142555114846e-05,
"loss": 1.0242,
"step": 379000
},
{
"epoch": 0.43,
"learning_rate": 2.8485899877390237e-05,
"loss": 1.0232,
"step": 379500
},
{
"epoch": 0.43,
"learning_rate": 2.845754424329588e-05,
"loss": 1.026,
"step": 380000
},
{
"epoch": 0.43,
"learning_rate": 2.8429188609201518e-05,
"loss": 1.0204,
"step": 380500
},
{
"epoch": 0.43,
"learning_rate": 2.8400832975107156e-05,
"loss": 1.0309,
"step": 381000
},
{
"epoch": 0.43,
"learning_rate": 2.83724773410128e-05,
"loss": 1.0229,
"step": 381500
},
{
"epoch": 0.43,
"learning_rate": 2.8344121706918437e-05,
"loss": 1.0236,
"step": 382000
},
{
"epoch": 0.43,
"learning_rate": 2.8315766072824072e-05,
"loss": 1.0157,
"step": 382500
},
{
"epoch": 0.43,
"learning_rate": 2.8287410438729718e-05,
"loss": 1.0271,
"step": 383000
},
{
"epoch": 0.43,
"learning_rate": 2.8259054804635353e-05,
"loss": 1.0162,
"step": 383500
},
{
"epoch": 0.44,
"learning_rate": 2.823075588180918e-05,
"loss": 1.0247,
"step": 384000
},
{
"epoch": 0.44,
"learning_rate": 2.8202400247714822e-05,
"loss": 1.0214,
"step": 384500
},
{
"epoch": 0.44,
"learning_rate": 2.817404461362046e-05,
"loss": 1.0247,
"step": 385000
},
{
"epoch": 0.44,
"learning_rate": 2.8145688979526096e-05,
"loss": 1.0257,
"step": 385500
},
{
"epoch": 0.44,
"learning_rate": 2.811733334543174e-05,
"loss": 1.0177,
"step": 386000
},
{
"epoch": 0.44,
"learning_rate": 2.8088977711337377e-05,
"loss": 1.0214,
"step": 386500
},
{
"epoch": 0.44,
"learning_rate": 2.8060622077243015e-05,
"loss": 1.0279,
"step": 387000
},
{
"epoch": 0.44,
"learning_rate": 2.8032323154416846e-05,
"loss": 1.0213,
"step": 387500
},
{
"epoch": 0.44,
"learning_rate": 2.800396752032248e-05,
"loss": 1.0162,
"step": 388000
},
{
"epoch": 0.44,
"learning_rate": 2.7975611886228127e-05,
"loss": 1.028,
"step": 388500
},
{
"epoch": 0.44,
"learning_rate": 2.7947256252133762e-05,
"loss": 1.0257,
"step": 389000
},
{
"epoch": 0.44,
"learning_rate": 2.79189006180394e-05,
"loss": 1.0182,
"step": 389500
},
{
"epoch": 0.44,
"learning_rate": 2.789060169521323e-05,
"loss": 1.0284,
"step": 390000
},
{
"epoch": 0.44,
"learning_rate": 2.786224606111887e-05,
"loss": 1.0254,
"step": 390500
},
{
"epoch": 0.44,
"learning_rate": 2.7833890427024505e-05,
"loss": 1.0226,
"step": 391000
},
{
"epoch": 0.44,
"learning_rate": 2.780553479293015e-05,
"loss": 1.0333,
"step": 391500
},
{
"epoch": 0.44,
"learning_rate": 2.7777179158835786e-05,
"loss": 1.0234,
"step": 392000
},
{
"epoch": 0.45,
"learning_rate": 2.7748823524741424e-05,
"loss": 1.0208,
"step": 392500
},
{
"epoch": 0.45,
"learning_rate": 2.7720467890647066e-05,
"loss": 1.0113,
"step": 393000
},
{
"epoch": 0.45,
"learning_rate": 2.7692112256552705e-05,
"loss": 1.0204,
"step": 393500
},
{
"epoch": 0.45,
"learning_rate": 2.7663813333726536e-05,
"loss": 1.0221,
"step": 394000
},
{
"epoch": 0.45,
"learning_rate": 2.763545769963217e-05,
"loss": 1.0142,
"step": 394500
},
{
"epoch": 0.45,
"learning_rate": 2.760710206553781e-05,
"loss": 1.0206,
"step": 395000
},
{
"epoch": 0.45,
"learning_rate": 2.7578746431443452e-05,
"loss": 1.0245,
"step": 395500
},
{
"epoch": 0.45,
"learning_rate": 2.755039079734909e-05,
"loss": 1.0211,
"step": 396000
},
{
"epoch": 0.45,
"learning_rate": 2.7522091874522914e-05,
"loss": 1.0229,
"step": 396500
},
{
"epoch": 0.45,
"learning_rate": 2.749373624042856e-05,
"loss": 1.0236,
"step": 397000
},
{
"epoch": 0.45,
"learning_rate": 2.7465380606334195e-05,
"loss": 1.0234,
"step": 397500
},
{
"epoch": 0.45,
"learning_rate": 2.7437081683508026e-05,
"loss": 1.0249,
"step": 398000
},
{
"epoch": 0.45,
"learning_rate": 2.7408726049413664e-05,
"loss": 1.0188,
"step": 398500
},
{
"epoch": 0.45,
"learning_rate": 2.7380427126587492e-05,
"loss": 1.02,
"step": 399000
},
{
"epoch": 0.45,
"learning_rate": 2.735207149249313e-05,
"loss": 1.0236,
"step": 399500
},
{
"epoch": 0.45,
"learning_rate": 2.732371585839877e-05,
"loss": 1.0245,
"step": 400000
},
{
"epoch": 0.45,
"learning_rate": 2.729536022430441e-05,
"loss": 1.0333,
"step": 400500
},
{
"epoch": 0.45,
"learning_rate": 2.726700459021005e-05,
"loss": 1.0197,
"step": 401000
},
{
"epoch": 0.46,
"learning_rate": 2.7238648956115685e-05,
"loss": 1.0254,
"step": 401500
},
{
"epoch": 0.46,
"learning_rate": 2.721029332202133e-05,
"loss": 1.0158,
"step": 402000
},
{
"epoch": 0.46,
"learning_rate": 2.7181937687926966e-05,
"loss": 1.0247,
"step": 402500
},
{
"epoch": 0.46,
"learning_rate": 2.7153582053832604e-05,
"loss": 1.0168,
"step": 403000
},
{
"epoch": 0.46,
"learning_rate": 2.7125226419738243e-05,
"loss": 1.0238,
"step": 403500
},
{
"epoch": 0.46,
"learning_rate": 2.7096870785643885e-05,
"loss": 1.0162,
"step": 404000
},
{
"epoch": 0.46,
"learning_rate": 2.7068515151549523e-05,
"loss": 1.0162,
"step": 404500
},
{
"epoch": 0.46,
"learning_rate": 2.7040216228723354e-05,
"loss": 1.0237,
"step": 405000
},
{
"epoch": 0.46,
"learning_rate": 2.701186059462899e-05,
"loss": 1.0313,
"step": 405500
},
{
"epoch": 0.46,
"learning_rate": 2.6983504960534628e-05,
"loss": 1.0143,
"step": 406000
},
{
"epoch": 0.46,
"learning_rate": 2.695514932644027e-05,
"loss": 1.024,
"step": 406500
},
{
"epoch": 0.46,
"learning_rate": 2.692679369234591e-05,
"loss": 1.0265,
"step": 407000
},
{
"epoch": 0.46,
"learning_rate": 2.6898438058251547e-05,
"loss": 1.0232,
"step": 407500
},
{
"epoch": 0.46,
"learning_rate": 2.6870139135425375e-05,
"loss": 1.0174,
"step": 408000
},
{
"epoch": 0.46,
"learning_rate": 2.6841840212599202e-05,
"loss": 1.0099,
"step": 408500
},
{
"epoch": 0.46,
"learning_rate": 2.6813484578504844e-05,
"loss": 1.0173,
"step": 409000
},
{
"epoch": 0.46,
"learning_rate": 2.6785128944410483e-05,
"loss": 1.0189,
"step": 409500
},
{
"epoch": 0.47,
"learning_rate": 2.6756773310316118e-05,
"loss": 1.0187,
"step": 410000
},
{
"epoch": 0.47,
"learning_rate": 2.672847438748995e-05,
"loss": 1.0174,
"step": 410500
},
{
"epoch": 0.47,
"learning_rate": 2.6700118753395587e-05,
"loss": 1.018,
"step": 411000
},
{
"epoch": 0.47,
"learning_rate": 2.667176311930123e-05,
"loss": 1.0273,
"step": 411500
},
{
"epoch": 0.47,
"learning_rate": 2.6643407485206868e-05,
"loss": 1.0175,
"step": 412000
},
{
"epoch": 0.47,
"learning_rate": 2.6615051851112503e-05,
"loss": 1.0088,
"step": 412500
},
{
"epoch": 0.47,
"learning_rate": 2.658669621701815e-05,
"loss": 1.0083,
"step": 413000
},
{
"epoch": 0.47,
"learning_rate": 2.6558340582923784e-05,
"loss": 1.0152,
"step": 413500
},
{
"epoch": 0.47,
"learning_rate": 2.6529984948829422e-05,
"loss": 1.0096,
"step": 414000
},
{
"epoch": 0.47,
"learning_rate": 2.6501629314735064e-05,
"loss": 1.0183,
"step": 414500
},
{
"epoch": 0.47,
"learning_rate": 2.6473273680640703e-05,
"loss": 1.0206,
"step": 415000
},
{
"epoch": 0.47,
"learning_rate": 2.644491804654634e-05,
"loss": 1.0195,
"step": 415500
},
{
"epoch": 0.47,
"learning_rate": 2.6416562412451977e-05,
"loss": 1.015,
"step": 416000
},
{
"epoch": 0.47,
"learning_rate": 2.6388263489625808e-05,
"loss": 1.0138,
"step": 416500
},
{
"epoch": 0.47,
"learning_rate": 2.6359907855531446e-05,
"loss": 1.0162,
"step": 417000
},
{
"epoch": 0.47,
"learning_rate": 2.6331552221437088e-05,
"loss": 1.0137,
"step": 417500
},
{
"epoch": 0.47,
"learning_rate": 2.6303196587342727e-05,
"loss": 1.0178,
"step": 418000
},
{
"epoch": 0.47,
"learning_rate": 2.6274897664516558e-05,
"loss": 1.0175,
"step": 418500
},
{
"epoch": 0.48,
"learning_rate": 2.6246598741690382e-05,
"loss": 1.0197,
"step": 419000
},
{
"epoch": 0.48,
"learning_rate": 2.6218243107596024e-05,
"loss": 1.0193,
"step": 419500
},
{
"epoch": 0.48,
"learning_rate": 2.6189887473501662e-05,
"loss": 1.0139,
"step": 420000
},
{
"epoch": 0.48,
"learning_rate": 2.61615318394073e-05,
"loss": 1.0128,
"step": 420500
},
{
"epoch": 0.48,
"learning_rate": 2.613323291658113e-05,
"loss": 1.0197,
"step": 421000
},
{
"epoch": 0.48,
"learning_rate": 2.6104877282486767e-05,
"loss": 1.0199,
"step": 421500
},
{
"epoch": 0.48,
"learning_rate": 2.6076521648392406e-05,
"loss": 1.0163,
"step": 422000
},
{
"epoch": 0.48,
"learning_rate": 2.6048166014298048e-05,
"loss": 1.0169,
"step": 422500
},
{
"epoch": 0.48,
"learning_rate": 2.6019810380203686e-05,
"loss": 1.0141,
"step": 423000
},
{
"epoch": 0.48,
"learning_rate": 2.5991511457377514e-05,
"loss": 1.0149,
"step": 423500
},
{
"epoch": 0.48,
"learning_rate": 2.5963155823283152e-05,
"loss": 1.0175,
"step": 424000
},
{
"epoch": 0.48,
"learning_rate": 2.593480018918879e-05,
"loss": 1.0233,
"step": 424500
},
{
"epoch": 0.48,
"learning_rate": 2.5906444555094433e-05,
"loss": 1.016,
"step": 425000
},
{
"epoch": 0.48,
"learning_rate": 2.5878145632268257e-05,
"loss": 1.0118,
"step": 425500
},
{
"epoch": 0.48,
"learning_rate": 2.5849789998173902e-05,
"loss": 1.0155,
"step": 426000
},
{
"epoch": 0.48,
"learning_rate": 2.5821434364079538e-05,
"loss": 1.0154,
"step": 426500
},
{
"epoch": 0.48,
"learning_rate": 2.5793078729985176e-05,
"loss": 1.012,
"step": 427000
},
{
"epoch": 0.48,
"learning_rate": 2.5764723095890815e-05,
"loss": 1.0192,
"step": 427500
},
{
"epoch": 0.49,
"learning_rate": 2.5736367461796457e-05,
"loss": 1.0156,
"step": 428000
},
{
"epoch": 0.49,
"learning_rate": 2.570806853897028e-05,
"loss": 1.0168,
"step": 428500
},
{
"epoch": 0.49,
"learning_rate": 2.5679712904875923e-05,
"loss": 1.0094,
"step": 429000
},
{
"epoch": 0.49,
"learning_rate": 2.565135727078156e-05,
"loss": 1.0231,
"step": 429500
},
{
"epoch": 0.49,
"learning_rate": 2.56230016366872e-05,
"loss": 1.0187,
"step": 430000
},
{
"epoch": 0.49,
"learning_rate": 2.559470271386103e-05,
"loss": 1.0081,
"step": 430500
},
{
"epoch": 0.49,
"learning_rate": 2.5566347079766666e-05,
"loss": 1.0148,
"step": 431000
},
{
"epoch": 0.49,
"learning_rate": 2.553799144567231e-05,
"loss": 1.0189,
"step": 431500
},
{
"epoch": 0.49,
"learning_rate": 2.5509692522846136e-05,
"loss": 1.0128,
"step": 432000
},
{
"epoch": 0.49,
"learning_rate": 2.548133688875177e-05,
"loss": 1.0281,
"step": 432500
},
{
"epoch": 0.49,
"learning_rate": 2.5452981254657416e-05,
"loss": 1.0068,
"step": 433000
},
{
"epoch": 0.49,
"learning_rate": 2.542462562056305e-05,
"loss": 1.0117,
"step": 433500
},
{
"epoch": 0.49,
"learning_rate": 2.539626998646869e-05,
"loss": 1.0162,
"step": 434000
},
{
"epoch": 0.49,
"learning_rate": 2.536797106364252e-05,
"loss": 1.012,
"step": 434500
},
{
"epoch": 0.49,
"learning_rate": 2.533961542954816e-05,
"loss": 1.0134,
"step": 435000
},
{
"epoch": 0.49,
"learning_rate": 2.53112597954538e-05,
"loss": 1.0131,
"step": 435500
},
{
"epoch": 0.49,
"learning_rate": 2.528290416135944e-05,
"loss": 1.0112,
"step": 436000
},
{
"epoch": 0.5,
"learning_rate": 2.5254548527265075e-05,
"loss": 1.0098,
"step": 436500
},
{
"epoch": 0.5,
"learning_rate": 2.522619289317072e-05,
"loss": 1.0226,
"step": 437000
},
{
"epoch": 0.5,
"learning_rate": 2.5197837259076356e-05,
"loss": 1.0001,
"step": 437500
},
{
"epoch": 0.5,
"learning_rate": 2.5169481624981995e-05,
"loss": 1.0252,
"step": 438000
},
{
"epoch": 0.5,
"learning_rate": 2.5141125990887637e-05,
"loss": 1.0117,
"step": 438500
},
{
"epoch": 0.5,
"learning_rate": 2.5112770356793275e-05,
"loss": 1.0151,
"step": 439000
},
{
"epoch": 0.5,
"learning_rate": 2.50844714339671e-05,
"loss": 1.023,
"step": 439500
},
{
"epoch": 0.5,
"learning_rate": 2.505617251114093e-05,
"loss": 1.0151,
"step": 440000
},
{
"epoch": 0.5,
"learning_rate": 2.502781687704657e-05,
"loss": 1.009,
"step": 440500
},
{
"epoch": 0.5,
"learning_rate": 2.4999461242952207e-05,
"loss": 1.0079,
"step": 441000
},
{
"epoch": 0.5,
"learning_rate": 2.497110560885785e-05,
"loss": 1.0206,
"step": 441500
},
{
"epoch": 0.5,
"learning_rate": 2.4942749974763488e-05,
"loss": 1.0178,
"step": 442000
},
{
"epoch": 0.5,
"learning_rate": 2.4914394340669126e-05,
"loss": 1.0174,
"step": 442500
},
{
"epoch": 0.5,
"learning_rate": 2.4886038706574765e-05,
"loss": 1.0103,
"step": 443000
},
{
"epoch": 0.5,
"learning_rate": 2.4857683072480404e-05,
"loss": 1.0227,
"step": 443500
},
{
"epoch": 0.5,
"learning_rate": 2.4829327438386042e-05,
"loss": 1.0151,
"step": 444000
},
{
"epoch": 0.5,
"learning_rate": 2.4800971804291684e-05,
"loss": 1.0095,
"step": 444500
},
{
"epoch": 0.5,
"learning_rate": 2.4772616170197323e-05,
"loss": 1.0053,
"step": 445000
},
{
"epoch": 0.51,
"learning_rate": 2.474426053610296e-05,
"loss": 1.013,
"step": 445500
},
{
"epoch": 0.51,
"learning_rate": 2.471596161327679e-05,
"loss": 1.0146,
"step": 446000
},
{
"epoch": 0.51,
"learning_rate": 2.468760597918243e-05,
"loss": 1.0074,
"step": 446500
},
{
"epoch": 0.51,
"learning_rate": 2.4659250345088066e-05,
"loss": 1.0054,
"step": 447000
},
{
"epoch": 0.51,
"learning_rate": 2.4630894710993708e-05,
"loss": 1.008,
"step": 447500
},
{
"epoch": 0.51,
"learning_rate": 2.4602595788167536e-05,
"loss": 1.0061,
"step": 448000
},
{
"epoch": 0.51,
"learning_rate": 2.4574240154073174e-05,
"loss": 1.0053,
"step": 448500
},
{
"epoch": 0.51,
"learning_rate": 2.4545884519978816e-05,
"loss": 1.0048,
"step": 449000
},
{
"epoch": 0.51,
"learning_rate": 2.451752888588445e-05,
"loss": 1.0169,
"step": 449500
},
{
"epoch": 0.51,
"learning_rate": 2.4489173251790093e-05,
"loss": 1.0218,
"step": 450000
},
{
"epoch": 0.51,
"learning_rate": 2.4460817617695732e-05,
"loss": 0.9961,
"step": 450500
},
{
"epoch": 0.51,
"learning_rate": 2.443246198360137e-05,
"loss": 1.0112,
"step": 451000
},
{
"epoch": 0.51,
"learning_rate": 2.4404163060775198e-05,
"loss": 1.0223,
"step": 451500
},
{
"epoch": 0.51,
"learning_rate": 2.4375864137949026e-05,
"loss": 1.0085,
"step": 452000
},
{
"epoch": 0.51,
"learning_rate": 2.4347508503854664e-05,
"loss": 1.0154,
"step": 452500
},
{
"epoch": 0.51,
"learning_rate": 2.4319152869760306e-05,
"loss": 1.0067,
"step": 453000
},
{
"epoch": 0.51,
"learning_rate": 2.4290797235665945e-05,
"loss": 1.0142,
"step": 453500
},
{
"epoch": 0.51,
"learning_rate": 2.4262441601571583e-05,
"loss": 1.0057,
"step": 454000
},
{
"epoch": 0.52,
"learning_rate": 2.4234085967477225e-05,
"loss": 1.0126,
"step": 454500
},
{
"epoch": 0.52,
"learning_rate": 2.420573033338286e-05,
"loss": 1.0095,
"step": 455000
},
{
"epoch": 0.52,
"learning_rate": 2.4177374699288503e-05,
"loss": 1.0021,
"step": 455500
},
{
"epoch": 0.52,
"learning_rate": 2.4149019065194138e-05,
"loss": 1.0106,
"step": 456000
},
{
"epoch": 0.52,
"learning_rate": 2.412066343109978e-05,
"loss": 1.015,
"step": 456500
},
{
"epoch": 0.52,
"learning_rate": 2.409230779700542e-05,
"loss": 1.0096,
"step": 457000
},
{
"epoch": 0.52,
"learning_rate": 2.4063952162911057e-05,
"loss": 1.0056,
"step": 457500
},
{
"epoch": 0.52,
"learning_rate": 2.4035653240084888e-05,
"loss": 1.0073,
"step": 458000
},
{
"epoch": 0.52,
"learning_rate": 2.4007297605990526e-05,
"loss": 1.0066,
"step": 458500
},
{
"epoch": 0.52,
"learning_rate": 2.3978998683164354e-05,
"loss": 1.0048,
"step": 459000
},
{
"epoch": 0.52,
"learning_rate": 2.3950643049069993e-05,
"loss": 1.0074,
"step": 459500
},
{
"epoch": 0.52,
"learning_rate": 2.3922287414975635e-05,
"loss": 1.001,
"step": 460000
},
{
"epoch": 0.52,
"learning_rate": 2.389393178088127e-05,
"loss": 1.0088,
"step": 460500
},
{
"epoch": 0.52,
"learning_rate": 2.3865576146786912e-05,
"loss": 1.0063,
"step": 461000
},
{
"epoch": 0.52,
"learning_rate": 2.383722051269255e-05,
"loss": 1.0125,
"step": 461500
},
{
"epoch": 0.52,
"learning_rate": 2.380886487859819e-05,
"loss": 1.0034,
"step": 462000
},
{
"epoch": 0.52,
"learning_rate": 2.3780509244503828e-05,
"loss": 1.0051,
"step": 462500
},
{
"epoch": 0.53,
"learning_rate": 2.3752210321677655e-05,
"loss": 1.0107,
"step": 463000
},
{
"epoch": 0.53,
"learning_rate": 2.3723854687583297e-05,
"loss": 1.0042,
"step": 463500
},
{
"epoch": 0.53,
"learning_rate": 2.3695499053488936e-05,
"loss": 1.0103,
"step": 464000
},
{
"epoch": 0.53,
"learning_rate": 2.3667143419394574e-05,
"loss": 1.0007,
"step": 464500
},
{
"epoch": 0.53,
"learning_rate": 2.3638787785300216e-05,
"loss": 1.0033,
"step": 465000
},
{
"epoch": 0.53,
"learning_rate": 2.3610488862474044e-05,
"loss": 1.0069,
"step": 465500
},
{
"epoch": 0.53,
"learning_rate": 2.358213322837968e-05,
"loss": 1.0048,
"step": 466000
},
{
"epoch": 0.53,
"learning_rate": 2.355383430555351e-05,
"loss": 0.9998,
"step": 466500
},
{
"epoch": 0.53,
"learning_rate": 2.352547867145915e-05,
"loss": 0.9987,
"step": 467000
},
{
"epoch": 0.53,
"learning_rate": 2.3497123037364787e-05,
"loss": 0.9919,
"step": 467500
},
{
"epoch": 0.53,
"learning_rate": 2.3468767403270426e-05,
"loss": 1.0182,
"step": 468000
},
{
"epoch": 0.53,
"learning_rate": 2.3440468480444256e-05,
"loss": 0.9936,
"step": 468500
},
{
"epoch": 0.53,
"learning_rate": 2.341211284634989e-05,
"loss": 1.007,
"step": 469000
},
{
"epoch": 0.53,
"learning_rate": 2.3383757212255534e-05,
"loss": 1.0146,
"step": 469500
},
{
"epoch": 0.53,
"learning_rate": 2.3355401578161172e-05,
"loss": 1.0052,
"step": 470000
},
{
"epoch": 0.53,
"learning_rate": 2.332704594406681e-05,
"loss": 1.0066,
"step": 470500
},
{
"epoch": 0.53,
"learning_rate": 2.3298690309972453e-05,
"loss": 0.995,
"step": 471000
},
{
"epoch": 0.53,
"learning_rate": 2.3270334675878088e-05,
"loss": 1.018,
"step": 471500
},
{
"epoch": 0.54,
"learning_rate": 2.324203575305192e-05,
"loss": 1.0077,
"step": 472000
},
{
"epoch": 0.54,
"learning_rate": 2.3213680118957557e-05,
"loss": 1.0112,
"step": 472500
},
{
"epoch": 0.54,
"learning_rate": 2.3185324484863196e-05,
"loss": 1.0096,
"step": 473000
},
{
"epoch": 0.54,
"learning_rate": 2.3157025562037024e-05,
"loss": 1.0105,
"step": 473500
},
{
"epoch": 0.54,
"learning_rate": 2.3128669927942666e-05,
"loss": 0.9991,
"step": 474000
},
{
"epoch": 0.54,
"learning_rate": 2.31003142938483e-05,
"loss": 0.9982,
"step": 474500
},
{
"epoch": 0.54,
"learning_rate": 2.3071958659753943e-05,
"loss": 1.0006,
"step": 475000
},
{
"epoch": 0.54,
"learning_rate": 2.304360302565958e-05,
"loss": 1.0105,
"step": 475500
},
{
"epoch": 0.54,
"learning_rate": 2.301524739156522e-05,
"loss": 1.0055,
"step": 476000
},
{
"epoch": 0.54,
"learning_rate": 2.2986891757470862e-05,
"loss": 0.9948,
"step": 476500
},
{
"epoch": 0.54,
"learning_rate": 2.29585361233765e-05,
"loss": 1.0058,
"step": 477000
},
{
"epoch": 0.54,
"learning_rate": 2.293018048928214e-05,
"loss": 1.0053,
"step": 477500
},
{
"epoch": 0.54,
"learning_rate": 2.2901824855187778e-05,
"loss": 0.9997,
"step": 478000
},
{
"epoch": 0.54,
"learning_rate": 2.2873525932361605e-05,
"loss": 1.001,
"step": 478500
},
{
"epoch": 0.54,
"learning_rate": 2.2845170298267247e-05,
"loss": 1.0083,
"step": 479000
},
{
"epoch": 0.54,
"learning_rate": 2.2816814664172882e-05,
"loss": 0.9881,
"step": 479500
},
{
"epoch": 0.54,
"learning_rate": 2.2788459030078524e-05,
"loss": 1.009,
"step": 480000
},
{
"epoch": 0.54,
"learning_rate": 2.2760103395984163e-05,
"loss": 1.0054,
"step": 480500
},
{
"epoch": 0.55,
"learning_rate": 2.27317477618898e-05,
"loss": 0.9985,
"step": 481000
},
{
"epoch": 0.55,
"learning_rate": 2.2703392127795444e-05,
"loss": 0.9986,
"step": 481500
},
{
"epoch": 0.55,
"learning_rate": 2.267503649370108e-05,
"loss": 0.9972,
"step": 482000
},
{
"epoch": 0.55,
"learning_rate": 2.264668085960672e-05,
"loss": 1.0016,
"step": 482500
},
{
"epoch": 0.55,
"learning_rate": 2.261838193678055e-05,
"loss": 1.0047,
"step": 483000
},
{
"epoch": 0.55,
"learning_rate": 2.2590026302686187e-05,
"loss": 0.9989,
"step": 483500
},
{
"epoch": 0.55,
"learning_rate": 2.2561784091128203e-05,
"loss": 1.0026,
"step": 484000
},
{
"epoch": 0.55,
"learning_rate": 2.2533428457033842e-05,
"loss": 0.9959,
"step": 484500
},
{
"epoch": 0.55,
"learning_rate": 2.2505072822939484e-05,
"loss": 0.9968,
"step": 485000
},
{
"epoch": 0.55,
"learning_rate": 2.2476717188845122e-05,
"loss": 1.0093,
"step": 485500
},
{
"epoch": 0.55,
"learning_rate": 2.244836155475076e-05,
"loss": 1.0019,
"step": 486000
},
{
"epoch": 0.55,
"learning_rate": 2.24200059206564e-05,
"loss": 1.0069,
"step": 486500
},
{
"epoch": 0.55,
"learning_rate": 2.2391650286562038e-05,
"loss": 1.0065,
"step": 487000
},
{
"epoch": 0.55,
"learning_rate": 2.236335136373587e-05,
"loss": 0.9968,
"step": 487500
},
{
"epoch": 0.55,
"learning_rate": 2.2334995729641504e-05,
"loss": 0.9999,
"step": 488000
},
{
"epoch": 0.55,
"learning_rate": 2.2306640095547146e-05,
"loss": 1.0083,
"step": 488500
},
{
"epoch": 0.55,
"learning_rate": 2.2278284461452785e-05,
"loss": 1.0068,
"step": 489000
},
{
"epoch": 0.56,
"learning_rate": 2.2249928827358424e-05,
"loss": 0.9943,
"step": 489500
},
{
"epoch": 0.56,
"learning_rate": 2.2221573193264066e-05,
"loss": 1.0,
"step": 490000
},
{
"epoch": 0.56,
"learning_rate": 2.2193274270437893e-05,
"loss": 1.0014,
"step": 490500
},
{
"epoch": 0.56,
"learning_rate": 2.216491863634353e-05,
"loss": 1.0007,
"step": 491000
},
{
"epoch": 0.56,
"learning_rate": 2.213656300224917e-05,
"loss": 1.0129,
"step": 491500
},
{
"epoch": 0.56,
"learning_rate": 2.210820736815481e-05,
"loss": 1.0107,
"step": 492000
},
{
"epoch": 0.56,
"learning_rate": 2.2079851734060447e-05,
"loss": 0.9897,
"step": 492500
},
{
"epoch": 0.56,
"learning_rate": 2.2051496099966086e-05,
"loss": 1.0077,
"step": 493000
},
{
"epoch": 0.56,
"learning_rate": 2.2023140465871728e-05,
"loss": 0.9976,
"step": 493500
},
{
"epoch": 0.56,
"learning_rate": 2.1994784831777367e-05,
"loss": 1.0056,
"step": 494000
},
{
"epoch": 0.56,
"learning_rate": 2.1966429197683005e-05,
"loss": 1.0041,
"step": 494500
},
{
"epoch": 0.56,
"learning_rate": 2.1938130274856833e-05,
"loss": 1.0069,
"step": 495000
},
{
"epoch": 0.56,
"learning_rate": 2.190983135203066e-05,
"loss": 1.0037,
"step": 495500
},
{
"epoch": 0.56,
"learning_rate": 2.1881475717936302e-05,
"loss": 0.9918,
"step": 496000
},
{
"epoch": 0.56,
"learning_rate": 2.185312008384194e-05,
"loss": 1.0019,
"step": 496500
},
{
"epoch": 0.56,
"learning_rate": 2.182476444974758e-05,
"loss": 0.9973,
"step": 497000
},
{
"epoch": 0.56,
"learning_rate": 2.1796408815653218e-05,
"loss": 0.9979,
"step": 497500
},
{
"epoch": 0.56,
"learning_rate": 2.176805318155886e-05,
"loss": 1.01,
"step": 498000
},
{
"epoch": 0.57,
"learning_rate": 2.1739697547464495e-05,
"loss": 1.0116,
"step": 498500
},
{
"epoch": 0.57,
"learning_rate": 2.1711341913370137e-05,
"loss": 0.9946,
"step": 499000
},
{
"epoch": 0.57,
"learning_rate": 2.1683042990543965e-05,
"loss": 0.9969,
"step": 499500
},
{
"epoch": 0.57,
"learning_rate": 2.1654744067717792e-05,
"loss": 0.9998,
"step": 500000
},
{
"epoch": 0.57,
"learning_rate": 2.162638843362343e-05,
"loss": 1.0017,
"step": 500500
},
{
"epoch": 0.57,
"learning_rate": 2.1598089510797258e-05,
"loss": 0.9965,
"step": 501000
},
{
"epoch": 0.57,
"learning_rate": 2.15697338767029e-05,
"loss": 0.9956,
"step": 501500
},
{
"epoch": 0.57,
"learning_rate": 2.1541378242608535e-05,
"loss": 1.0097,
"step": 502000
},
{
"epoch": 0.57,
"learning_rate": 2.1513022608514177e-05,
"loss": 1.0106,
"step": 502500
},
{
"epoch": 0.57,
"learning_rate": 2.1484666974419816e-05,
"loss": 0.9964,
"step": 503000
},
{
"epoch": 0.57,
"learning_rate": 2.1456311340325455e-05,
"loss": 1.0046,
"step": 503500
},
{
"epoch": 0.57,
"learning_rate": 2.1427955706231097e-05,
"loss": 0.9957,
"step": 504000
},
{
"epoch": 0.57,
"learning_rate": 2.1399600072136732e-05,
"loss": 1.0013,
"step": 504500
},
{
"epoch": 0.57,
"learning_rate": 2.1371244438042374e-05,
"loss": 0.9992,
"step": 505000
},
{
"epoch": 0.57,
"learning_rate": 2.1342888803948012e-05,
"loss": 1.0072,
"step": 505500
},
{
"epoch": 0.57,
"learning_rate": 2.131453316985365e-05,
"loss": 1.0009,
"step": 506000
},
{
"epoch": 0.57,
"learning_rate": 2.1286177535759293e-05,
"loss": 0.9977,
"step": 506500
},
{
"epoch": 0.58,
"learning_rate": 2.125782190166493e-05,
"loss": 0.9949,
"step": 507000
},
{
"epoch": 0.58,
"learning_rate": 2.122946626757057e-05,
"loss": 0.9974,
"step": 507500
},
{
"epoch": 0.58,
"learning_rate": 2.1201167344744398e-05,
"loss": 1.0007,
"step": 508000
},
{
"epoch": 0.58,
"learning_rate": 2.1172811710650036e-05,
"loss": 0.9942,
"step": 508500
},
{
"epoch": 0.58,
"learning_rate": 2.1144456076555678e-05,
"loss": 1.005,
"step": 509000
},
{
"epoch": 0.58,
"learning_rate": 2.1116100442461313e-05,
"loss": 0.9941,
"step": 509500
},
{
"epoch": 0.58,
"learning_rate": 2.1087801519635144e-05,
"loss": 1.0026,
"step": 510000
},
{
"epoch": 0.58,
"learning_rate": 2.1059445885540783e-05,
"loss": 0.9952,
"step": 510500
},
{
"epoch": 0.58,
"learning_rate": 2.103109025144642e-05,
"loss": 0.9996,
"step": 511000
},
{
"epoch": 0.58,
"learning_rate": 2.100273461735206e-05,
"loss": 1.0008,
"step": 511500
},
{
"epoch": 0.58,
"learning_rate": 2.09743789832577e-05,
"loss": 0.9932,
"step": 512000
},
{
"epoch": 0.58,
"learning_rate": 2.094602334916334e-05,
"loss": 0.9992,
"step": 512500
},
{
"epoch": 0.58,
"learning_rate": 2.0917724426337168e-05,
"loss": 1.0031,
"step": 513000
},
{
"epoch": 0.58,
"learning_rate": 2.0889368792242807e-05,
"loss": 1.0025,
"step": 513500
},
{
"epoch": 0.58,
"learning_rate": 2.0861013158148445e-05,
"loss": 1.0033,
"step": 514000
},
{
"epoch": 0.58,
"learning_rate": 2.0832657524054087e-05,
"loss": 0.9991,
"step": 514500
},
{
"epoch": 0.58,
"learning_rate": 2.0804301889959723e-05,
"loss": 0.9925,
"step": 515000
},
{
"epoch": 0.58,
"learning_rate": 2.0775946255865365e-05,
"loss": 0.992,
"step": 515500
},
{
"epoch": 0.59,
"learning_rate": 2.074770404430738e-05,
"loss": 0.9922,
"step": 516000
},
{
"epoch": 0.59,
"learning_rate": 2.071934841021302e-05,
"loss": 1.0043,
"step": 516500
},
{
"epoch": 0.59,
"learning_rate": 2.0690992776118658e-05,
"loss": 0.9967,
"step": 517000
},
{
"epoch": 0.59,
"learning_rate": 2.06626371420243e-05,
"loss": 0.9938,
"step": 517500
},
{
"epoch": 0.59,
"learning_rate": 2.0634281507929935e-05,
"loss": 1.0054,
"step": 518000
},
{
"epoch": 0.59,
"learning_rate": 2.0605925873835577e-05,
"loss": 0.9903,
"step": 518500
},
{
"epoch": 0.59,
"learning_rate": 2.0577570239741216e-05,
"loss": 1.0006,
"step": 519000
},
{
"epoch": 0.59,
"learning_rate": 2.0549214605646855e-05,
"loss": 0.991,
"step": 519500
},
{
"epoch": 0.59,
"learning_rate": 2.0520858971552497e-05,
"loss": 0.9868,
"step": 520000
},
{
"epoch": 0.59,
"learning_rate": 2.0492503337458132e-05,
"loss": 0.9972,
"step": 520500
},
{
"epoch": 0.59,
"learning_rate": 2.0464147703363774e-05,
"loss": 0.9961,
"step": 521000
},
{
"epoch": 0.59,
"learning_rate": 2.0435792069269412e-05,
"loss": 0.9924,
"step": 521500
},
{
"epoch": 0.59,
"learning_rate": 2.040743643517505e-05,
"loss": 1.0049,
"step": 522000
},
{
"epoch": 0.59,
"learning_rate": 2.037908080108069e-05,
"loss": 0.994,
"step": 522500
},
{
"epoch": 0.59,
"learning_rate": 2.035072516698633e-05,
"loss": 0.9881,
"step": 523000
},
{
"epoch": 0.59,
"learning_rate": 2.032242624416016e-05,
"loss": 0.9917,
"step": 523500
},
{
"epoch": 0.59,
"learning_rate": 2.0294070610065798e-05,
"loss": 0.9919,
"step": 524000
},
{
"epoch": 0.59,
"learning_rate": 2.0265828398507814e-05,
"loss": 0.9902,
"step": 524500
},
{
"epoch": 0.6,
"learning_rate": 2.0237472764413453e-05,
"loss": 0.9817,
"step": 525000
},
{
"epoch": 0.6,
"learning_rate": 2.020911713031909e-05,
"loss": 0.9926,
"step": 525500
},
{
"epoch": 0.6,
"learning_rate": 2.0180761496224733e-05,
"loss": 0.9852,
"step": 526000
},
{
"epoch": 0.6,
"learning_rate": 2.0152405862130372e-05,
"loss": 1.001,
"step": 526500
},
{
"epoch": 0.6,
"learning_rate": 2.012405022803601e-05,
"loss": 1.0008,
"step": 527000
},
{
"epoch": 0.6,
"learning_rate": 2.009569459394165e-05,
"loss": 0.9931,
"step": 527500
},
{
"epoch": 0.6,
"learning_rate": 2.006733895984729e-05,
"loss": 0.991,
"step": 528000
},
{
"epoch": 0.6,
"learning_rate": 2.0038983325752926e-05,
"loss": 0.989,
"step": 528500
},
{
"epoch": 0.6,
"learning_rate": 2.0010627691658568e-05,
"loss": 0.9918,
"step": 529000
},
{
"epoch": 0.6,
"learning_rate": 1.9982272057564207e-05,
"loss": 0.9946,
"step": 529500
},
{
"epoch": 0.6,
"learning_rate": 1.9953973134738034e-05,
"loss": 0.9953,
"step": 530000
},
{
"epoch": 0.6,
"learning_rate": 1.9925617500643673e-05,
"loss": 0.9923,
"step": 530500
},
{
"epoch": 0.6,
"learning_rate": 1.9897318577817504e-05,
"loss": 0.995,
"step": 531000
},
{
"epoch": 0.6,
"learning_rate": 1.986896294372314e-05,
"loss": 0.9963,
"step": 531500
},
{
"epoch": 0.6,
"learning_rate": 1.9840664020896966e-05,
"loss": 0.9852,
"step": 532000
},
{
"epoch": 0.6,
"learning_rate": 1.981230838680261e-05,
"loss": 0.9863,
"step": 532500
},
{
"epoch": 0.6,
"learning_rate": 1.9783952752708247e-05,
"loss": 1.0024,
"step": 533000
},
{
"epoch": 0.61,
"learning_rate": 1.9755597118613886e-05,
"loss": 0.9835,
"step": 533500
},
{
"epoch": 0.61,
"learning_rate": 1.9727241484519528e-05,
"loss": 0.9939,
"step": 534000
},
{
"epoch": 0.61,
"learning_rate": 1.9698885850425166e-05,
"loss": 0.9932,
"step": 534500
},
{
"epoch": 0.61,
"learning_rate": 1.9670530216330805e-05,
"loss": 0.9922,
"step": 535000
},
{
"epoch": 0.61,
"learning_rate": 1.9642174582236443e-05,
"loss": 0.9876,
"step": 535500
},
{
"epoch": 0.61,
"learning_rate": 1.9613818948142082e-05,
"loss": 0.9955,
"step": 536000
},
{
"epoch": 0.61,
"learning_rate": 1.958546331404772e-05,
"loss": 0.9902,
"step": 536500
},
{
"epoch": 0.61,
"learning_rate": 1.9557107679953363e-05,
"loss": 0.9865,
"step": 537000
},
{
"epoch": 0.61,
"learning_rate": 1.9528752045859e-05,
"loss": 0.9973,
"step": 537500
},
{
"epoch": 0.61,
"learning_rate": 1.950045312303283e-05,
"loss": 0.9892,
"step": 538000
},
{
"epoch": 0.61,
"learning_rate": 1.9472097488938467e-05,
"loss": 0.9803,
"step": 538500
},
{
"epoch": 0.61,
"learning_rate": 1.944374185484411e-05,
"loss": 1.0036,
"step": 539000
},
{
"epoch": 0.61,
"learning_rate": 1.9415386220749744e-05,
"loss": 0.9909,
"step": 539500
},
{
"epoch": 0.61,
"learning_rate": 1.9387030586655386e-05,
"loss": 0.9942,
"step": 540000
},
{
"epoch": 0.61,
"learning_rate": 1.9358674952561025e-05,
"loss": 0.9936,
"step": 540500
},
{
"epoch": 0.61,
"learning_rate": 1.9330376029734853e-05,
"loss": 0.9986,
"step": 541000
},
{
"epoch": 0.61,
"learning_rate": 1.930202039564049e-05,
"loss": 0.9943,
"step": 541500
},
{
"epoch": 0.61,
"learning_rate": 1.927366476154613e-05,
"loss": 0.9922,
"step": 542000
},
{
"epoch": 0.62,
"learning_rate": 1.9245309127451772e-05,
"loss": 0.9821,
"step": 542500
},
{
"epoch": 0.62,
"learning_rate": 1.921695349335741e-05,
"loss": 0.9926,
"step": 543000
},
{
"epoch": 0.62,
"learning_rate": 1.918859785926305e-05,
"loss": 0.998,
"step": 543500
},
{
"epoch": 0.62,
"learning_rate": 1.9160298936436876e-05,
"loss": 0.9919,
"step": 544000
},
{
"epoch": 0.62,
"learning_rate": 1.913194330234252e-05,
"loss": 0.9886,
"step": 544500
},
{
"epoch": 0.62,
"learning_rate": 1.9103587668248154e-05,
"loss": 0.9965,
"step": 545000
},
{
"epoch": 0.62,
"learning_rate": 1.9075345456690173e-05,
"loss": 0.9941,
"step": 545500
},
{
"epoch": 0.62,
"learning_rate": 1.9046989822595812e-05,
"loss": 0.9832,
"step": 546000
},
{
"epoch": 0.62,
"learning_rate": 1.9018634188501454e-05,
"loss": 0.9805,
"step": 546500
},
{
"epoch": 0.62,
"learning_rate": 1.899027855440709e-05,
"loss": 0.9913,
"step": 547000
},
{
"epoch": 0.62,
"learning_rate": 1.896192292031273e-05,
"loss": 0.9906,
"step": 547500
},
{
"epoch": 0.62,
"learning_rate": 1.8933567286218366e-05,
"loss": 0.9859,
"step": 548000
},
{
"epoch": 0.62,
"learning_rate": 1.890521165212401e-05,
"loss": 0.9951,
"step": 548500
},
{
"epoch": 0.62,
"learning_rate": 1.8876856018029647e-05,
"loss": 0.9988,
"step": 549000
},
{
"epoch": 0.62,
"learning_rate": 1.8848500383935286e-05,
"loss": 0.9934,
"step": 549500
},
{
"epoch": 0.62,
"learning_rate": 1.8820144749840928e-05,
"loss": 1.0011,
"step": 550000
},
{
"epoch": 0.62,
"learning_rate": 1.8791845827014755e-05,
"loss": 0.9946,
"step": 550500
},
{
"epoch": 0.62,
"learning_rate": 1.8763490192920394e-05,
"loss": 0.9864,
"step": 551000
},
{
"epoch": 0.63,
"learning_rate": 1.8735134558826032e-05,
"loss": 0.9965,
"step": 551500
},
{
"epoch": 0.63,
"learning_rate": 1.8706835635999863e-05,
"loss": 0.9811,
"step": 552000
},
{
"epoch": 0.63,
"learning_rate": 1.8678480001905498e-05,
"loss": 0.9851,
"step": 552500
},
{
"epoch": 0.63,
"learning_rate": 1.865012436781114e-05,
"loss": 0.9932,
"step": 553000
},
{
"epoch": 0.63,
"learning_rate": 1.8621768733716776e-05,
"loss": 0.9888,
"step": 553500
},
{
"epoch": 0.63,
"learning_rate": 1.8593413099622418e-05,
"loss": 0.9839,
"step": 554000
},
{
"epoch": 0.63,
"learning_rate": 1.8565114176796245e-05,
"loss": 0.9882,
"step": 554500
},
{
"epoch": 0.63,
"learning_rate": 1.8536758542701884e-05,
"loss": 0.9815,
"step": 555000
},
{
"epoch": 0.63,
"learning_rate": 1.8508402908607526e-05,
"loss": 0.9839,
"step": 555500
},
{
"epoch": 0.63,
"learning_rate": 1.8480047274513164e-05,
"loss": 0.9842,
"step": 556000
},
{
"epoch": 0.63,
"learning_rate": 1.8451691640418803e-05,
"loss": 0.9871,
"step": 556500
},
{
"epoch": 0.63,
"learning_rate": 1.842333600632444e-05,
"loss": 0.9908,
"step": 557000
},
{
"epoch": 0.63,
"learning_rate": 1.839498037223008e-05,
"loss": 0.9846,
"step": 557500
},
{
"epoch": 0.63,
"learning_rate": 1.8366624738135722e-05,
"loss": 0.9952,
"step": 558000
},
{
"epoch": 0.63,
"learning_rate": 1.8338269104041357e-05,
"loss": 0.997,
"step": 558500
},
{
"epoch": 0.63,
"learning_rate": 1.8309913469947e-05,
"loss": 0.9956,
"step": 559000
},
{
"epoch": 0.63,
"learning_rate": 1.8281557835852638e-05,
"loss": 0.9836,
"step": 559500
},
{
"epoch": 0.64,
"learning_rate": 1.8253202201758276e-05,
"loss": 0.9878,
"step": 560000
},
{
"epoch": 0.64,
"learning_rate": 1.822484656766392e-05,
"loss": 0.9938,
"step": 560500
},
{
"epoch": 0.64,
"learning_rate": 1.8196547644837746e-05,
"loss": 0.983,
"step": 561000
},
{
"epoch": 0.64,
"learning_rate": 1.8168192010743384e-05,
"loss": 0.9908,
"step": 561500
},
{
"epoch": 0.64,
"learning_rate": 1.8139836376649023e-05,
"loss": 0.9919,
"step": 562000
},
{
"epoch": 0.64,
"learning_rate": 1.8111480742554662e-05,
"loss": 0.9829,
"step": 562500
},
{
"epoch": 0.64,
"learning_rate": 1.8083125108460304e-05,
"loss": 0.9736,
"step": 563000
},
{
"epoch": 0.64,
"learning_rate": 1.805476947436594e-05,
"loss": 0.981,
"step": 563500
},
{
"epoch": 0.64,
"learning_rate": 1.802641384027158e-05,
"loss": 0.9936,
"step": 564000
},
{
"epoch": 0.64,
"learning_rate": 1.799805820617722e-05,
"loss": 0.99,
"step": 564500
},
{
"epoch": 0.64,
"learning_rate": 1.7969702572082858e-05,
"loss": 0.9939,
"step": 565000
},
{
"epoch": 0.64,
"learning_rate": 1.7941403649256686e-05,
"loss": 0.9803,
"step": 565500
},
{
"epoch": 0.64,
"learning_rate": 1.7913048015162328e-05,
"loss": 0.9912,
"step": 566000
},
{
"epoch": 0.64,
"learning_rate": 1.788474909233615e-05,
"loss": 0.992,
"step": 566500
},
{
"epoch": 0.64,
"learning_rate": 1.7856393458241794e-05,
"loss": 0.9903,
"step": 567000
},
{
"epoch": 0.64,
"learning_rate": 1.7828037824147432e-05,
"loss": 0.976,
"step": 567500
},
{
"epoch": 0.64,
"learning_rate": 1.779968219005307e-05,
"loss": 0.9914,
"step": 568000
},
{
"epoch": 0.64,
"learning_rate": 1.7771326555958713e-05,
"loss": 0.9889,
"step": 568500
},
{
"epoch": 0.65,
"learning_rate": 1.7742970921864348e-05,
"loss": 0.9841,
"step": 569000
},
{
"epoch": 0.65,
"learning_rate": 1.7714671999038176e-05,
"loss": 0.9846,
"step": 569500
},
{
"epoch": 0.65,
"learning_rate": 1.7686316364943818e-05,
"loss": 0.9827,
"step": 570000
},
{
"epoch": 0.65,
"learning_rate": 1.7657960730849456e-05,
"loss": 0.9951,
"step": 570500
},
{
"epoch": 0.65,
"learning_rate": 1.7629605096755095e-05,
"loss": 0.983,
"step": 571000
},
{
"epoch": 0.65,
"learning_rate": 1.7601249462660733e-05,
"loss": 0.981,
"step": 571500
},
{
"epoch": 0.65,
"learning_rate": 1.757295053983456e-05,
"loss": 0.9897,
"step": 572000
},
{
"epoch": 0.65,
"learning_rate": 1.7544594905740203e-05,
"loss": 0.977,
"step": 572500
},
{
"epoch": 0.65,
"learning_rate": 1.751629598291403e-05,
"loss": 0.9825,
"step": 573000
},
{
"epoch": 0.65,
"learning_rate": 1.748794034881967e-05,
"loss": 0.9795,
"step": 573500
},
{
"epoch": 0.65,
"learning_rate": 1.7459584714725307e-05,
"loss": 0.9859,
"step": 574000
},
{
"epoch": 0.65,
"learning_rate": 1.7431285791899135e-05,
"loss": 0.9837,
"step": 574500
},
{
"epoch": 0.65,
"learning_rate": 1.7402930157804777e-05,
"loss": 0.9859,
"step": 575000
},
{
"epoch": 0.65,
"learning_rate": 1.7374574523710415e-05,
"loss": 0.9728,
"step": 575500
},
{
"epoch": 0.65,
"learning_rate": 1.7346218889616054e-05,
"loss": 0.9748,
"step": 576000
},
{
"epoch": 0.65,
"learning_rate": 1.7317863255521693e-05,
"loss": 0.9908,
"step": 576500
},
{
"epoch": 0.65,
"learning_rate": 1.7289507621427335e-05,
"loss": 0.9836,
"step": 577000
},
{
"epoch": 0.66,
"learning_rate": 1.726115198733297e-05,
"loss": 0.9849,
"step": 577500
},
{
"epoch": 0.66,
"learning_rate": 1.7232796353238612e-05,
"loss": 0.981,
"step": 578000
},
{
"epoch": 0.66,
"learning_rate": 1.720444071914425e-05,
"loss": 0.9864,
"step": 578500
},
{
"epoch": 0.66,
"learning_rate": 1.7176141796318078e-05,
"loss": 0.9832,
"step": 579000
},
{
"epoch": 0.66,
"learning_rate": 1.7147786162223717e-05,
"loss": 0.9839,
"step": 579500
},
{
"epoch": 0.66,
"learning_rate": 1.711943052812936e-05,
"loss": 0.9855,
"step": 580000
},
{
"epoch": 0.66,
"learning_rate": 1.7091074894034997e-05,
"loss": 0.9783,
"step": 580500
},
{
"epoch": 0.66,
"learning_rate": 1.7062719259940636e-05,
"loss": 0.9795,
"step": 581000
},
{
"epoch": 0.66,
"learning_rate": 1.7034363625846274e-05,
"loss": 0.9807,
"step": 581500
},
{
"epoch": 0.66,
"learning_rate": 1.7006007991751913e-05,
"loss": 0.9904,
"step": 582000
},
{
"epoch": 0.66,
"learning_rate": 1.697765235765755e-05,
"loss": 0.9851,
"step": 582500
},
{
"epoch": 0.66,
"learning_rate": 1.6949296723563194e-05,
"loss": 0.9822,
"step": 583000
},
{
"epoch": 0.66,
"learning_rate": 1.6920941089468832e-05,
"loss": 0.9958,
"step": 583500
},
{
"epoch": 0.66,
"learning_rate": 1.689258545537447e-05,
"loss": 0.9844,
"step": 584000
},
{
"epoch": 0.66,
"learning_rate": 1.6864229821280113e-05,
"loss": 0.9854,
"step": 584500
},
{
"epoch": 0.66,
"learning_rate": 1.6835874187185748e-05,
"loss": 0.9834,
"step": 585000
},
{
"epoch": 0.66,
"learning_rate": 1.6807575264359576e-05,
"loss": 0.9897,
"step": 585500
},
{
"epoch": 0.66,
"learning_rate": 1.6779219630265218e-05,
"loss": 0.9865,
"step": 586000
},
{
"epoch": 0.67,
"learning_rate": 1.6750863996170856e-05,
"loss": 0.9856,
"step": 586500
},
{
"epoch": 0.67,
"learning_rate": 1.6722508362076495e-05,
"loss": 0.9778,
"step": 587000
},
{
"epoch": 0.67,
"learning_rate": 1.6694209439250326e-05,
"loss": 0.9848,
"step": 587500
},
{
"epoch": 0.67,
"learning_rate": 1.666585380515596e-05,
"loss": 0.984,
"step": 588000
},
{
"epoch": 0.67,
"learning_rate": 1.6637554882329788e-05,
"loss": 0.9857,
"step": 588500
},
{
"epoch": 0.67,
"learning_rate": 1.660919924823543e-05,
"loss": 0.9821,
"step": 589000
},
{
"epoch": 0.67,
"learning_rate": 1.658084361414107e-05,
"loss": 0.9858,
"step": 589500
},
{
"epoch": 0.67,
"learning_rate": 1.6552487980046707e-05,
"loss": 0.9867,
"step": 590000
},
{
"epoch": 0.67,
"learning_rate": 1.652413234595235e-05,
"loss": 0.9771,
"step": 590500
},
{
"epoch": 0.67,
"learning_rate": 1.6495776711857985e-05,
"loss": 0.9785,
"step": 591000
},
{
"epoch": 0.67,
"learning_rate": 1.6467421077763627e-05,
"loss": 0.9895,
"step": 591500
},
{
"epoch": 0.67,
"learning_rate": 1.6439065443669265e-05,
"loss": 0.9833,
"step": 592000
},
{
"epoch": 0.67,
"learning_rate": 1.6410709809574904e-05,
"loss": 0.993,
"step": 592500
},
{
"epoch": 0.67,
"learning_rate": 1.6382410886748735e-05,
"loss": 0.9815,
"step": 593000
},
{
"epoch": 0.67,
"learning_rate": 1.635405525265437e-05,
"loss": 0.9872,
"step": 593500
},
{
"epoch": 0.67,
"learning_rate": 1.6325699618560012e-05,
"loss": 0.979,
"step": 594000
},
{
"epoch": 0.67,
"learning_rate": 1.629740069573384e-05,
"loss": 0.9726,
"step": 594500
},
{
"epoch": 0.67,
"learning_rate": 1.6269045061639478e-05,
"loss": 0.9865,
"step": 595000
},
{
"epoch": 0.68,
"learning_rate": 1.6240689427545117e-05,
"loss": 0.972,
"step": 595500
},
{
"epoch": 0.68,
"learning_rate": 1.6212333793450755e-05,
"loss": 0.991,
"step": 596000
},
{
"epoch": 0.68,
"learning_rate": 1.6183978159356397e-05,
"loss": 0.9768,
"step": 596500
},
{
"epoch": 0.68,
"learning_rate": 1.6155679236530225e-05,
"loss": 0.982,
"step": 597000
},
{
"epoch": 0.68,
"learning_rate": 1.6127323602435863e-05,
"loss": 0.9815,
"step": 597500
},
{
"epoch": 0.68,
"learning_rate": 1.6098967968341502e-05,
"loss": 0.9774,
"step": 598000
},
{
"epoch": 0.68,
"learning_rate": 1.6070612334247144e-05,
"loss": 0.9892,
"step": 598500
},
{
"epoch": 0.68,
"learning_rate": 1.604225670015278e-05,
"loss": 0.9855,
"step": 599000
},
{
"epoch": 0.68,
"learning_rate": 1.6013957777326607e-05,
"loss": 0.9809,
"step": 599500
},
{
"epoch": 0.68,
"learning_rate": 1.598560214323225e-05,
"loss": 0.982,
"step": 600000
},
{
"epoch": 0.68,
"learning_rate": 1.5957246509137887e-05,
"loss": 0.9747,
"step": 600500
},
{
"epoch": 0.68,
"learning_rate": 1.5928890875043526e-05,
"loss": 0.9738,
"step": 601000
},
{
"epoch": 0.68,
"learning_rate": 1.5900535240949164e-05,
"loss": 0.9766,
"step": 601500
},
{
"epoch": 0.68,
"learning_rate": 1.5872179606854806e-05,
"loss": 0.973,
"step": 602000
},
{
"epoch": 0.68,
"learning_rate": 1.5843823972760445e-05,
"loss": 0.9879,
"step": 602500
},
{
"epoch": 0.68,
"learning_rate": 1.5815468338666084e-05,
"loss": 0.986,
"step": 603000
},
{
"epoch": 0.68,
"learning_rate": 1.5787112704571726e-05,
"loss": 0.9771,
"step": 603500
},
{
"epoch": 0.69,
"learning_rate": 1.575875707047736e-05,
"loss": 0.9872,
"step": 604000
},
{
"epoch": 0.69,
"learning_rate": 1.5730458147651188e-05,
"loss": 0.99,
"step": 604500
},
{
"epoch": 0.69,
"learning_rate": 1.570210251355683e-05,
"loss": 0.973,
"step": 605000
},
{
"epoch": 0.69,
"learning_rate": 1.567374687946247e-05,
"loss": 0.9817,
"step": 605500
},
{
"epoch": 0.69,
"learning_rate": 1.5645391245368107e-05,
"loss": 0.9812,
"step": 606000
},
{
"epoch": 0.69,
"learning_rate": 1.5617035611273746e-05,
"loss": 0.9915,
"step": 606500
},
{
"epoch": 0.69,
"learning_rate": 1.5588679977179385e-05,
"loss": 0.9732,
"step": 607000
},
{
"epoch": 0.69,
"learning_rate": 1.5560324343085027e-05,
"loss": 0.9905,
"step": 607500
},
{
"epoch": 0.69,
"learning_rate": 1.5531968708990665e-05,
"loss": 0.9749,
"step": 608000
},
{
"epoch": 0.69,
"learning_rate": 1.5503613074896304e-05,
"loss": 0.9836,
"step": 608500
},
{
"epoch": 0.69,
"learning_rate": 1.5475314152070135e-05,
"loss": 0.9862,
"step": 609000
},
{
"epoch": 0.69,
"learning_rate": 1.544695851797577e-05,
"loss": 0.9842,
"step": 609500
},
{
"epoch": 0.69,
"learning_rate": 1.5418659595149597e-05,
"loss": 0.9814,
"step": 610000
},
{
"epoch": 0.69,
"learning_rate": 1.539030396105524e-05,
"loss": 0.9939,
"step": 610500
},
{
"epoch": 0.69,
"learning_rate": 1.5361948326960878e-05,
"loss": 0.9725,
"step": 611000
},
{
"epoch": 0.69,
"learning_rate": 1.5333592692866517e-05,
"loss": 0.9773,
"step": 611500
},
{
"epoch": 0.69,
"learning_rate": 1.5305237058772155e-05,
"loss": 0.9796,
"step": 612000
},
{
"epoch": 0.69,
"learning_rate": 1.5276881424677797e-05,
"loss": 0.9804,
"step": 612500
},
{
"epoch": 0.7,
"learning_rate": 1.5248525790583434e-05,
"loss": 0.9725,
"step": 613000
},
{
"epoch": 0.7,
"learning_rate": 1.5220170156489074e-05,
"loss": 0.972,
"step": 613500
},
{
"epoch": 0.7,
"learning_rate": 1.5191871233662902e-05,
"loss": 0.9727,
"step": 614000
},
{
"epoch": 0.7,
"learning_rate": 1.5163515599568542e-05,
"loss": 0.9777,
"step": 614500
},
{
"epoch": 0.7,
"learning_rate": 1.5135159965474179e-05,
"loss": 0.9731,
"step": 615000
},
{
"epoch": 0.7,
"learning_rate": 1.510680433137982e-05,
"loss": 0.9727,
"step": 615500
},
{
"epoch": 0.7,
"learning_rate": 1.507844869728546e-05,
"loss": 0.9839,
"step": 616000
},
{
"epoch": 0.7,
"learning_rate": 1.5050093063191098e-05,
"loss": 0.9794,
"step": 616500
},
{
"epoch": 0.7,
"learning_rate": 1.5021737429096739e-05,
"loss": 0.973,
"step": 617000
},
{
"epoch": 0.7,
"learning_rate": 1.4993381795002376e-05,
"loss": 0.9803,
"step": 617500
},
{
"epoch": 0.7,
"learning_rate": 1.4965026160908016e-05,
"loss": 0.9746,
"step": 618000
},
{
"epoch": 0.7,
"learning_rate": 1.4936670526813656e-05,
"loss": 0.972,
"step": 618500
},
{
"epoch": 0.7,
"learning_rate": 1.490842831525567e-05,
"loss": 0.9896,
"step": 619000
},
{
"epoch": 0.7,
"learning_rate": 1.4880072681161311e-05,
"loss": 0.9666,
"step": 619500
},
{
"epoch": 0.7,
"learning_rate": 1.4851717047066951e-05,
"loss": 0.9798,
"step": 620000
},
{
"epoch": 0.7,
"learning_rate": 1.4823361412972588e-05,
"loss": 0.9755,
"step": 620500
},
{
"epoch": 0.7,
"learning_rate": 1.4795005778878229e-05,
"loss": 0.976,
"step": 621000
},
{
"epoch": 0.7,
"learning_rate": 1.4766650144783869e-05,
"loss": 0.9763,
"step": 621500
},
{
"epoch": 0.71,
"learning_rate": 1.4738294510689507e-05,
"loss": 0.9684,
"step": 622000
},
{
"epoch": 0.71,
"learning_rate": 1.4709938876595148e-05,
"loss": 0.9886,
"step": 622500
},
{
"epoch": 0.71,
"learning_rate": 1.4681583242500785e-05,
"loss": 0.971,
"step": 623000
},
{
"epoch": 0.71,
"learning_rate": 1.4653284319674616e-05,
"loss": 0.9841,
"step": 623500
},
{
"epoch": 0.71,
"learning_rate": 1.4624928685580252e-05,
"loss": 0.9822,
"step": 624000
},
{
"epoch": 0.71,
"learning_rate": 1.4596573051485893e-05,
"loss": 0.9764,
"step": 624500
},
{
"epoch": 0.71,
"learning_rate": 1.456827412865972e-05,
"loss": 0.9775,
"step": 625000
},
{
"epoch": 0.71,
"learning_rate": 1.453991849456536e-05,
"loss": 0.9756,
"step": 625500
},
{
"epoch": 0.71,
"learning_rate": 1.4511562860470997e-05,
"loss": 0.9922,
"step": 626000
},
{
"epoch": 0.71,
"learning_rate": 1.4483207226376638e-05,
"loss": 0.9789,
"step": 626500
},
{
"epoch": 0.71,
"learning_rate": 1.4454851592282278e-05,
"loss": 0.9804,
"step": 627000
},
{
"epoch": 0.71,
"learning_rate": 1.4426495958187917e-05,
"loss": 0.9702,
"step": 627500
},
{
"epoch": 0.71,
"learning_rate": 1.4398140324093557e-05,
"loss": 0.9721,
"step": 628000
},
{
"epoch": 0.71,
"learning_rate": 1.4369784689999197e-05,
"loss": 0.9782,
"step": 628500
},
{
"epoch": 0.71,
"learning_rate": 1.4341485767173025e-05,
"loss": 0.9725,
"step": 629000
},
{
"epoch": 0.71,
"learning_rate": 1.4313130133078662e-05,
"loss": 0.9715,
"step": 629500
},
{
"epoch": 0.71,
"learning_rate": 1.4284831210252492e-05,
"loss": 0.9695,
"step": 630000
},
{
"epoch": 0.72,
"learning_rate": 1.425647557615813e-05,
"loss": 0.9712,
"step": 630500
},
{
"epoch": 0.72,
"learning_rate": 1.422811994206377e-05,
"loss": 0.9682,
"step": 631000
},
{
"epoch": 0.72,
"learning_rate": 1.4199764307969407e-05,
"loss": 0.9719,
"step": 631500
},
{
"epoch": 0.72,
"learning_rate": 1.4171408673875047e-05,
"loss": 0.9745,
"step": 632000
},
{
"epoch": 0.72,
"learning_rate": 1.4143053039780687e-05,
"loss": 0.9769,
"step": 632500
},
{
"epoch": 0.72,
"learning_rate": 1.4114697405686326e-05,
"loss": 0.97,
"step": 633000
},
{
"epoch": 0.72,
"learning_rate": 1.4086341771591966e-05,
"loss": 0.9707,
"step": 633500
},
{
"epoch": 0.72,
"learning_rate": 1.4058042848765794e-05,
"loss": 0.9803,
"step": 634000
},
{
"epoch": 0.72,
"learning_rate": 1.4029687214671434e-05,
"loss": 0.9779,
"step": 634500
},
{
"epoch": 0.72,
"learning_rate": 1.400133158057707e-05,
"loss": 0.9755,
"step": 635000
},
{
"epoch": 0.72,
"learning_rate": 1.3973032657750902e-05,
"loss": 0.9712,
"step": 635500
},
{
"epoch": 0.72,
"learning_rate": 1.3944677023656538e-05,
"loss": 0.9684,
"step": 636000
},
{
"epoch": 0.72,
"learning_rate": 1.3916321389562179e-05,
"loss": 0.9793,
"step": 636500
},
{
"epoch": 0.72,
"learning_rate": 1.3888022466736006e-05,
"loss": 0.9718,
"step": 637000
},
{
"epoch": 0.72,
"learning_rate": 1.3859723543909834e-05,
"loss": 0.9717,
"step": 637500
},
{
"epoch": 0.72,
"learning_rate": 1.3831367909815474e-05,
"loss": 0.9764,
"step": 638000
},
{
"epoch": 0.72,
"learning_rate": 1.3803012275721114e-05,
"loss": 0.9641,
"step": 638500
},
{
"epoch": 0.72,
"learning_rate": 1.3774656641626751e-05,
"loss": 0.9771,
"step": 639000
},
{
"epoch": 0.73,
"learning_rate": 1.3746301007532391e-05,
"loss": 0.9691,
"step": 639500
},
{
"epoch": 0.73,
"learning_rate": 1.3717945373438028e-05,
"loss": 0.971,
"step": 640000
},
{
"epoch": 0.73,
"learning_rate": 1.3689589739343669e-05,
"loss": 0.9731,
"step": 640500
},
{
"epoch": 0.73,
"learning_rate": 1.3661234105249309e-05,
"loss": 0.9767,
"step": 641000
},
{
"epoch": 0.73,
"learning_rate": 1.3632878471154948e-05,
"loss": 0.9653,
"step": 641500
},
{
"epoch": 0.73,
"learning_rate": 1.3604522837060588e-05,
"loss": 0.9729,
"step": 642000
},
{
"epoch": 0.73,
"learning_rate": 1.3576167202966228e-05,
"loss": 0.9628,
"step": 642500
},
{
"epoch": 0.73,
"learning_rate": 1.3547811568871865e-05,
"loss": 0.9756,
"step": 643000
},
{
"epoch": 0.73,
"learning_rate": 1.3519512646045693e-05,
"loss": 0.9818,
"step": 643500
},
{
"epoch": 0.73,
"learning_rate": 1.3491157011951333e-05,
"loss": 0.9808,
"step": 644000
},
{
"epoch": 0.73,
"learning_rate": 1.346285808912516e-05,
"loss": 0.9755,
"step": 644500
},
{
"epoch": 0.73,
"learning_rate": 1.34345024550308e-05,
"loss": 0.9725,
"step": 645000
},
{
"epoch": 0.73,
"learning_rate": 1.3406146820936441e-05,
"loss": 0.9754,
"step": 645500
},
{
"epoch": 0.73,
"learning_rate": 1.3377791186842078e-05,
"loss": 0.9796,
"step": 646000
},
{
"epoch": 0.73,
"learning_rate": 1.3349435552747718e-05,
"loss": 0.9779,
"step": 646500
},
{
"epoch": 0.73,
"learning_rate": 1.3321079918653357e-05,
"loss": 0.9741,
"step": 647000
},
{
"epoch": 0.73,
"learning_rate": 1.3292780995827186e-05,
"loss": 0.9711,
"step": 647500
},
{
"epoch": 0.73,
"learning_rate": 1.3264425361732825e-05,
"loss": 0.9764,
"step": 648000
},
{
"epoch": 0.74,
"learning_rate": 1.3236069727638465e-05,
"loss": 0.9716,
"step": 648500
},
{
"epoch": 0.74,
"learning_rate": 1.3207714093544105e-05,
"loss": 0.9703,
"step": 649000
},
{
"epoch": 0.74,
"learning_rate": 1.3179358459449742e-05,
"loss": 0.9741,
"step": 649500
},
{
"epoch": 0.74,
"learning_rate": 1.3151002825355382e-05,
"loss": 0.962,
"step": 650000
},
{
"epoch": 0.74,
"learning_rate": 1.312264719126102e-05,
"loss": 0.9723,
"step": 650500
},
{
"epoch": 0.74,
"learning_rate": 1.309429155716666e-05,
"loss": 0.971,
"step": 651000
},
{
"epoch": 0.74,
"learning_rate": 1.30659359230723e-05,
"loss": 0.9659,
"step": 651500
},
{
"epoch": 0.74,
"learning_rate": 1.3037580288977938e-05,
"loss": 0.973,
"step": 652000
},
{
"epoch": 0.74,
"learning_rate": 1.3009224654883579e-05,
"loss": 0.9649,
"step": 652500
},
{
"epoch": 0.74,
"learning_rate": 1.2980925732057406e-05,
"loss": 0.9739,
"step": 653000
},
{
"epoch": 0.74,
"learning_rate": 1.2952626809231234e-05,
"loss": 0.9721,
"step": 653500
},
{
"epoch": 0.74,
"learning_rate": 1.2924271175136874e-05,
"loss": 0.9722,
"step": 654000
},
{
"epoch": 0.74,
"learning_rate": 1.2895915541042514e-05,
"loss": 0.9699,
"step": 654500
},
{
"epoch": 0.74,
"learning_rate": 1.2867559906948151e-05,
"loss": 0.9611,
"step": 655000
},
{
"epoch": 0.74,
"learning_rate": 1.2839204272853791e-05,
"loss": 0.9761,
"step": 655500
},
{
"epoch": 0.74,
"learning_rate": 1.2810848638759428e-05,
"loss": 0.9618,
"step": 656000
},
{
"epoch": 0.74,
"learning_rate": 1.2782493004665069e-05,
"loss": 0.9714,
"step": 656500
},
{
"epoch": 0.75,
"learning_rate": 1.2754137370570709e-05,
"loss": 0.9764,
"step": 657000
},
{
"epoch": 0.75,
"learning_rate": 1.2725781736476348e-05,
"loss": 0.9708,
"step": 657500
},
{
"epoch": 0.75,
"learning_rate": 1.2697426102381988e-05,
"loss": 0.9673,
"step": 658000
},
{
"epoch": 0.75,
"learning_rate": 1.2669127179555815e-05,
"loss": 0.9642,
"step": 658500
},
{
"epoch": 0.75,
"learning_rate": 1.2640771545461456e-05,
"loss": 0.9664,
"step": 659000
},
{
"epoch": 0.75,
"learning_rate": 1.2612415911367093e-05,
"loss": 0.9681,
"step": 659500
},
{
"epoch": 0.75,
"learning_rate": 1.2584116988540923e-05,
"loss": 0.9707,
"step": 660000
},
{
"epoch": 0.75,
"learning_rate": 1.255576135444656e-05,
"loss": 0.961,
"step": 660500
},
{
"epoch": 0.75,
"learning_rate": 1.25274057203522e-05,
"loss": 0.9649,
"step": 661000
},
{
"epoch": 0.75,
"learning_rate": 1.249905008625784e-05,
"loss": 0.9611,
"step": 661500
},
{
"epoch": 0.75,
"learning_rate": 1.2470694452163478e-05,
"loss": 0.9685,
"step": 662000
},
{
"epoch": 0.75,
"learning_rate": 1.2442338818069118e-05,
"loss": 0.9748,
"step": 662500
},
{
"epoch": 0.75,
"learning_rate": 1.2413983183974757e-05,
"loss": 0.9633,
"step": 663000
},
{
"epoch": 0.75,
"learning_rate": 1.2385627549880397e-05,
"loss": 0.9667,
"step": 663500
},
{
"epoch": 0.75,
"learning_rate": 1.2357271915786036e-05,
"loss": 0.9796,
"step": 664000
},
{
"epoch": 0.75,
"learning_rate": 1.2328972992959865e-05,
"loss": 0.9569,
"step": 664500
},
{
"epoch": 0.75,
"learning_rate": 1.2300617358865503e-05,
"loss": 0.9757,
"step": 665000
},
{
"epoch": 0.75,
"learning_rate": 1.2272261724771142e-05,
"loss": 0.97,
"step": 665500
},
{
"epoch": 0.76,
"learning_rate": 1.2243906090676782e-05,
"loss": 0.9733,
"step": 666000
},
{
"epoch": 0.76,
"learning_rate": 1.2215550456582421e-05,
"loss": 0.9563,
"step": 666500
},
{
"epoch": 0.76,
"learning_rate": 1.218719482248806e-05,
"loss": 0.9745,
"step": 667000
},
{
"epoch": 0.76,
"learning_rate": 1.2158839188393698e-05,
"loss": 0.9661,
"step": 667500
},
{
"epoch": 0.76,
"learning_rate": 1.2130540265567527e-05,
"loss": 0.9667,
"step": 668000
},
{
"epoch": 0.76,
"learning_rate": 1.2102184631473166e-05,
"loss": 0.9658,
"step": 668500
},
{
"epoch": 0.76,
"learning_rate": 1.2073885708646993e-05,
"loss": 0.9702,
"step": 669000
},
{
"epoch": 0.76,
"learning_rate": 1.2045530074552634e-05,
"loss": 0.9644,
"step": 669500
},
{
"epoch": 0.76,
"learning_rate": 1.2017174440458272e-05,
"loss": 0.9734,
"step": 670000
},
{
"epoch": 0.76,
"learning_rate": 1.1988818806363913e-05,
"loss": 0.9704,
"step": 670500
},
{
"epoch": 0.76,
"learning_rate": 1.1960463172269551e-05,
"loss": 0.9614,
"step": 671000
},
{
"epoch": 0.76,
"learning_rate": 1.1932107538175192e-05,
"loss": 0.9628,
"step": 671500
},
{
"epoch": 0.76,
"learning_rate": 1.190375190408083e-05,
"loss": 0.9607,
"step": 672000
},
{
"epoch": 0.76,
"learning_rate": 1.1875396269986469e-05,
"loss": 0.9652,
"step": 672500
},
{
"epoch": 0.76,
"learning_rate": 1.1847097347160298e-05,
"loss": 0.962,
"step": 673000
},
{
"epoch": 0.76,
"learning_rate": 1.1818741713065936e-05,
"loss": 0.9572,
"step": 673500
},
{
"epoch": 0.76,
"learning_rate": 1.1790386078971575e-05,
"loss": 0.9683,
"step": 674000
},
{
"epoch": 0.77,
"learning_rate": 1.1762030444877214e-05,
"loss": 0.9622,
"step": 674500
},
{
"epoch": 0.77,
"learning_rate": 1.1733674810782854e-05,
"loss": 0.9601,
"step": 675000
},
{
"epoch": 0.77,
"learning_rate": 1.1705375887956681e-05,
"loss": 0.9681,
"step": 675500
},
{
"epoch": 0.77,
"learning_rate": 1.1677020253862322e-05,
"loss": 0.9731,
"step": 676000
},
{
"epoch": 0.77,
"learning_rate": 1.1648664619767962e-05,
"loss": 0.9673,
"step": 676500
},
{
"epoch": 0.77,
"learning_rate": 1.16203089856736e-05,
"loss": 0.9699,
"step": 677000
},
{
"epoch": 0.77,
"learning_rate": 1.159195335157924e-05,
"loss": 0.969,
"step": 677500
},
{
"epoch": 0.77,
"learning_rate": 1.1563597717484878e-05,
"loss": 0.9682,
"step": 678000
},
{
"epoch": 0.77,
"learning_rate": 1.1535242083390518e-05,
"loss": 0.9603,
"step": 678500
},
{
"epoch": 0.77,
"learning_rate": 1.1506886449296157e-05,
"loss": 0.9669,
"step": 679000
},
{
"epoch": 0.77,
"learning_rate": 1.1478587526469984e-05,
"loss": 0.9622,
"step": 679500
},
{
"epoch": 0.77,
"learning_rate": 1.1450288603643813e-05,
"loss": 0.9674,
"step": 680000
},
{
"epoch": 0.77,
"learning_rate": 1.142198968081764e-05,
"loss": 0.9608,
"step": 680500
},
{
"epoch": 0.77,
"learning_rate": 1.139363404672328e-05,
"loss": 0.9697,
"step": 681000
},
{
"epoch": 0.77,
"learning_rate": 1.136527841262892e-05,
"loss": 0.9684,
"step": 681500
},
{
"epoch": 0.77,
"learning_rate": 1.1336922778534558e-05,
"loss": 0.9669,
"step": 682000
},
{
"epoch": 0.77,
"learning_rate": 1.1308567144440197e-05,
"loss": 0.9762,
"step": 682500
},
{
"epoch": 0.77,
"learning_rate": 1.1280211510345837e-05,
"loss": 0.9691,
"step": 683000
},
{
"epoch": 0.78,
"learning_rate": 1.1251855876251478e-05,
"loss": 0.9578,
"step": 683500
},
{
"epoch": 0.78,
"learning_rate": 1.1223500242157116e-05,
"loss": 0.9582,
"step": 684000
},
{
"epoch": 0.78,
"learning_rate": 1.1195144608062755e-05,
"loss": 0.9628,
"step": 684500
},
{
"epoch": 0.78,
"learning_rate": 1.1166788973968393e-05,
"loss": 0.9667,
"step": 685000
},
{
"epoch": 0.78,
"learning_rate": 1.1138490051142223e-05,
"loss": 0.9689,
"step": 685500
},
{
"epoch": 0.78,
"learning_rate": 1.1110134417047861e-05,
"loss": 0.9671,
"step": 686000
},
{
"epoch": 0.78,
"learning_rate": 1.108183549422169e-05,
"loss": 0.9569,
"step": 686500
},
{
"epoch": 0.78,
"learning_rate": 1.1053479860127329e-05,
"loss": 0.9745,
"step": 687000
},
{
"epoch": 0.78,
"learning_rate": 1.1025180937301156e-05,
"loss": 0.9617,
"step": 687500
},
{
"epoch": 0.78,
"learning_rate": 1.0996825303206795e-05,
"loss": 0.963,
"step": 688000
},
{
"epoch": 0.78,
"learning_rate": 1.0968469669112435e-05,
"loss": 0.9718,
"step": 688500
},
{
"epoch": 0.78,
"learning_rate": 1.0940114035018074e-05,
"loss": 0.9576,
"step": 689000
},
{
"epoch": 0.78,
"learning_rate": 1.0911758400923714e-05,
"loss": 0.9648,
"step": 689500
},
{
"epoch": 0.78,
"learning_rate": 1.0883402766829353e-05,
"loss": 0.9619,
"step": 690000
},
{
"epoch": 0.78,
"learning_rate": 1.0855047132734993e-05,
"loss": 0.9674,
"step": 690500
},
{
"epoch": 0.78,
"learning_rate": 1.0826691498640632e-05,
"loss": 0.9683,
"step": 691000
},
{
"epoch": 0.78,
"learning_rate": 1.079833586454627e-05,
"loss": 0.9679,
"step": 691500
},
{
"epoch": 0.78,
"learning_rate": 1.0769980230451909e-05,
"loss": 0.9587,
"step": 692000
},
{
"epoch": 0.79,
"learning_rate": 1.074162459635755e-05,
"loss": 0.9625,
"step": 692500
},
{
"epoch": 0.79,
"learning_rate": 1.0713325673531377e-05,
"loss": 0.9681,
"step": 693000
},
{
"epoch": 0.79,
"learning_rate": 1.0684970039437015e-05,
"loss": 0.9708,
"step": 693500
},
{
"epoch": 0.79,
"learning_rate": 1.0656614405342656e-05,
"loss": 0.9635,
"step": 694000
},
{
"epoch": 0.79,
"learning_rate": 1.0628315482516483e-05,
"loss": 0.9583,
"step": 694500
},
{
"epoch": 0.79,
"learning_rate": 1.0599959848422122e-05,
"loss": 0.9659,
"step": 695000
},
{
"epoch": 0.79,
"learning_rate": 1.0571604214327762e-05,
"loss": 0.9611,
"step": 695500
},
{
"epoch": 0.79,
"learning_rate": 1.0543248580233402e-05,
"loss": 0.965,
"step": 696000
},
{
"epoch": 0.79,
"learning_rate": 1.051489294613904e-05,
"loss": 0.958,
"step": 696500
},
{
"epoch": 0.79,
"learning_rate": 1.048653731204468e-05,
"loss": 0.9603,
"step": 697000
},
{
"epoch": 0.79,
"learning_rate": 1.045818167795032e-05,
"loss": 0.9658,
"step": 697500
},
{
"epoch": 0.79,
"learning_rate": 1.0429882755124147e-05,
"loss": 0.9618,
"step": 698000
},
{
"epoch": 0.79,
"learning_rate": 1.0401527121029786e-05,
"loss": 0.9724,
"step": 698500
},
{
"epoch": 0.79,
"learning_rate": 1.0373171486935426e-05,
"loss": 0.959,
"step": 699000
},
{
"epoch": 0.79,
"learning_rate": 1.0344872564109254e-05,
"loss": 0.964,
"step": 699500
},
{
"epoch": 0.79,
"learning_rate": 1.0316516930014892e-05,
"loss": 0.9665,
"step": 700000
},
{
"epoch": 0.79,
"learning_rate": 1.028816129592053e-05,
"loss": 0.9659,
"step": 700500
},
{
"epoch": 0.8,
"learning_rate": 1.0259805661826171e-05,
"loss": 0.9496,
"step": 701000
},
{
"epoch": 0.8,
"learning_rate": 1.0231450027731811e-05,
"loss": 0.9658,
"step": 701500
},
{
"epoch": 0.8,
"learning_rate": 1.020309439363745e-05,
"loss": 0.9705,
"step": 702000
},
{
"epoch": 0.8,
"learning_rate": 1.0174738759543089e-05,
"loss": 0.9643,
"step": 702500
},
{
"epoch": 0.8,
"learning_rate": 1.0146383125448729e-05,
"loss": 0.9706,
"step": 703000
},
{
"epoch": 0.8,
"learning_rate": 1.0118027491354367e-05,
"loss": 0.9624,
"step": 703500
},
{
"epoch": 0.8,
"learning_rate": 1.0089671857260006e-05,
"loss": 0.9669,
"step": 704000
},
{
"epoch": 0.8,
"learning_rate": 1.0061316223165645e-05,
"loss": 0.968,
"step": 704500
},
{
"epoch": 0.8,
"learning_rate": 1.0032960589071285e-05,
"loss": 0.9671,
"step": 705000
},
{
"epoch": 0.8,
"learning_rate": 1.0004604954976925e-05,
"loss": 0.9484,
"step": 705500
},
{
"epoch": 0.8,
"learning_rate": 9.976249320882564e-06,
"loss": 0.9545,
"step": 706000
},
{
"epoch": 0.8,
"learning_rate": 9.947893686788203e-06,
"loss": 0.9583,
"step": 706500
},
{
"epoch": 0.8,
"learning_rate": 9.919538052693843e-06,
"loss": 0.9651,
"step": 707000
},
{
"epoch": 0.8,
"learning_rate": 9.891182418599481e-06,
"loss": 0.9668,
"step": 707500
},
{
"epoch": 0.8,
"learning_rate": 9.86282678450512e-06,
"loss": 0.9645,
"step": 708000
},
{
"epoch": 0.8,
"learning_rate": 9.834471150410759e-06,
"loss": 0.9703,
"step": 708500
},
{
"epoch": 0.8,
"learning_rate": 9.806115516316399e-06,
"loss": 0.9585,
"step": 709000
},
{
"epoch": 0.8,
"learning_rate": 9.777873304758415e-06,
"loss": 0.9683,
"step": 709500
},
{
"epoch": 0.81,
"learning_rate": 9.749517670664056e-06,
"loss": 0.9618,
"step": 710000
},
{
"epoch": 0.81,
"learning_rate": 9.721162036569694e-06,
"loss": 0.9621,
"step": 710500
},
{
"epoch": 0.81,
"learning_rate": 9.692806402475334e-06,
"loss": 0.9609,
"step": 711000
},
{
"epoch": 0.81,
"learning_rate": 9.664450768380973e-06,
"loss": 0.9654,
"step": 711500
},
{
"epoch": 0.81,
"learning_rate": 9.636095134286613e-06,
"loss": 0.9624,
"step": 712000
},
{
"epoch": 0.81,
"learning_rate": 9.607739500192252e-06,
"loss": 0.9577,
"step": 712500
},
{
"epoch": 0.81,
"learning_rate": 9.57938386609789e-06,
"loss": 0.9502,
"step": 713000
},
{
"epoch": 0.81,
"learning_rate": 9.55102823200353e-06,
"loss": 0.9596,
"step": 713500
},
{
"epoch": 0.81,
"learning_rate": 9.52267259790917e-06,
"loss": 0.9628,
"step": 714000
},
{
"epoch": 0.81,
"learning_rate": 9.494316963814808e-06,
"loss": 0.9621,
"step": 714500
},
{
"epoch": 0.81,
"learning_rate": 9.465961329720448e-06,
"loss": 0.9479,
"step": 715000
},
{
"epoch": 0.81,
"learning_rate": 9.437605695626087e-06,
"loss": 0.9521,
"step": 715500
},
{
"epoch": 0.81,
"learning_rate": 9.409306772799916e-06,
"loss": 0.9632,
"step": 716000
},
{
"epoch": 0.81,
"learning_rate": 9.380951138705555e-06,
"loss": 0.9517,
"step": 716500
},
{
"epoch": 0.81,
"learning_rate": 9.352595504611193e-06,
"loss": 0.9538,
"step": 717000
},
{
"epoch": 0.81,
"learning_rate": 9.324239870516834e-06,
"loss": 0.9554,
"step": 717500
},
{
"epoch": 0.81,
"learning_rate": 9.295884236422472e-06,
"loss": 0.9552,
"step": 718000
},
{
"epoch": 0.81,
"learning_rate": 9.2675853135963e-06,
"loss": 0.952,
"step": 718500
},
{
"epoch": 0.82,
"learning_rate": 9.23922967950194e-06,
"loss": 0.9689,
"step": 719000
},
{
"epoch": 0.82,
"learning_rate": 9.210930756675767e-06,
"loss": 0.9554,
"step": 719500
},
{
"epoch": 0.82,
"learning_rate": 9.182575122581406e-06,
"loss": 0.9591,
"step": 720000
},
{
"epoch": 0.82,
"learning_rate": 9.154219488487045e-06,
"loss": 0.9599,
"step": 720500
},
{
"epoch": 0.82,
"learning_rate": 9.125863854392685e-06,
"loss": 0.951,
"step": 721000
},
{
"epoch": 0.82,
"learning_rate": 9.097508220298324e-06,
"loss": 0.9536,
"step": 721500
},
{
"epoch": 0.82,
"learning_rate": 9.069152586203964e-06,
"loss": 0.951,
"step": 722000
},
{
"epoch": 0.82,
"learning_rate": 9.040796952109603e-06,
"loss": 0.9584,
"step": 722500
},
{
"epoch": 0.82,
"learning_rate": 9.012498029283432e-06,
"loss": 0.9532,
"step": 723000
},
{
"epoch": 0.82,
"learning_rate": 8.98414239518907e-06,
"loss": 0.966,
"step": 723500
},
{
"epoch": 0.82,
"learning_rate": 8.955786761094709e-06,
"loss": 0.9611,
"step": 724000
},
{
"epoch": 0.82,
"learning_rate": 8.92743112700035e-06,
"loss": 0.9633,
"step": 724500
},
{
"epoch": 0.82,
"learning_rate": 8.899075492905988e-06,
"loss": 0.9666,
"step": 725000
},
{
"epoch": 0.82,
"learning_rate": 8.870776570079815e-06,
"loss": 0.9713,
"step": 725500
},
{
"epoch": 0.82,
"learning_rate": 8.842420935985456e-06,
"loss": 0.9557,
"step": 726000
},
{
"epoch": 0.82,
"learning_rate": 8.814065301891094e-06,
"loss": 0.9483,
"step": 726500
},
{
"epoch": 0.82,
"learning_rate": 8.785709667796733e-06,
"loss": 0.9602,
"step": 727000
},
{
"epoch": 0.83,
"learning_rate": 8.757354033702373e-06,
"loss": 0.9436,
"step": 727500
},
{
"epoch": 0.83,
"learning_rate": 8.7290551108762e-06,
"loss": 0.9613,
"step": 728000
},
{
"epoch": 0.83,
"learning_rate": 8.700756188050028e-06,
"loss": 0.9605,
"step": 728500
},
{
"epoch": 0.83,
"learning_rate": 8.672400553955667e-06,
"loss": 0.964,
"step": 729000
},
{
"epoch": 0.83,
"learning_rate": 8.644044919861307e-06,
"loss": 0.9644,
"step": 729500
},
{
"epoch": 0.83,
"learning_rate": 8.615689285766947e-06,
"loss": 0.9516,
"step": 730000
},
{
"epoch": 0.83,
"learning_rate": 8.587333651672586e-06,
"loss": 0.9565,
"step": 730500
},
{
"epoch": 0.83,
"learning_rate": 8.558978017578224e-06,
"loss": 0.9563,
"step": 731000
},
{
"epoch": 0.83,
"learning_rate": 8.530679094752054e-06,
"loss": 0.958,
"step": 731500
},
{
"epoch": 0.83,
"learning_rate": 8.502323460657692e-06,
"loss": 0.9594,
"step": 732000
},
{
"epoch": 0.83,
"learning_rate": 8.47396782656333e-06,
"loss": 0.9577,
"step": 732500
},
{
"epoch": 0.83,
"learning_rate": 8.445612192468971e-06,
"loss": 0.9671,
"step": 733000
},
{
"epoch": 0.83,
"learning_rate": 8.41725655837461e-06,
"loss": 0.9539,
"step": 733500
},
{
"epoch": 0.83,
"learning_rate": 8.388900924280248e-06,
"loss": 0.9599,
"step": 734000
},
{
"epoch": 0.83,
"learning_rate": 8.360602001454077e-06,
"loss": 0.953,
"step": 734500
},
{
"epoch": 0.83,
"learning_rate": 8.332246367359716e-06,
"loss": 0.9513,
"step": 735000
},
{
"epoch": 0.83,
"learning_rate": 8.303890733265356e-06,
"loss": 0.9582,
"step": 735500
},
{
"epoch": 0.83,
"learning_rate": 8.275535099170995e-06,
"loss": 0.958,
"step": 736000
},
{
"epoch": 0.84,
"learning_rate": 8.247179465076635e-06,
"loss": 0.9549,
"step": 736500
},
{
"epoch": 0.84,
"learning_rate": 8.218823830982274e-06,
"loss": 0.9657,
"step": 737000
},
{
"epoch": 0.84,
"learning_rate": 8.190468196887912e-06,
"loss": 0.9493,
"step": 737500
},
{
"epoch": 0.84,
"learning_rate": 8.162112562793551e-06,
"loss": 0.9625,
"step": 738000
},
{
"epoch": 0.84,
"learning_rate": 8.133756928699191e-06,
"loss": 0.9549,
"step": 738500
},
{
"epoch": 0.84,
"learning_rate": 8.10540129460483e-06,
"loss": 0.9567,
"step": 739000
},
{
"epoch": 0.84,
"learning_rate": 8.07704566051047e-06,
"loss": 0.9513,
"step": 739500
},
{
"epoch": 0.84,
"learning_rate": 8.048746737684298e-06,
"loss": 0.9591,
"step": 740000
},
{
"epoch": 0.84,
"learning_rate": 8.020447814858125e-06,
"loss": 0.9548,
"step": 740500
},
{
"epoch": 0.84,
"learning_rate": 7.992148892031953e-06,
"loss": 0.9655,
"step": 741000
},
{
"epoch": 0.84,
"learning_rate": 7.963793257937593e-06,
"loss": 0.959,
"step": 741500
},
{
"epoch": 0.84,
"learning_rate": 7.935437623843232e-06,
"loss": 0.9559,
"step": 742000
},
{
"epoch": 0.84,
"learning_rate": 7.907081989748872e-06,
"loss": 0.9588,
"step": 742500
},
{
"epoch": 0.84,
"learning_rate": 7.87872635565451e-06,
"loss": 0.9559,
"step": 743000
},
{
"epoch": 0.84,
"learning_rate": 7.85037072156015e-06,
"loss": 0.9518,
"step": 743500
},
{
"epoch": 0.84,
"learning_rate": 7.82201508746579e-06,
"loss": 0.9653,
"step": 744000
},
{
"epoch": 0.84,
"learning_rate": 7.793659453371428e-06,
"loss": 0.9477,
"step": 744500
},
{
"epoch": 0.84,
"learning_rate": 7.765303819277067e-06,
"loss": 0.9602,
"step": 745000
},
{
"epoch": 0.85,
"learning_rate": 7.736948185182707e-06,
"loss": 0.959,
"step": 745500
},
{
"epoch": 0.85,
"learning_rate": 7.708592551088347e-06,
"loss": 0.9573,
"step": 746000
},
{
"epoch": 0.85,
"learning_rate": 7.680236916993986e-06,
"loss": 0.9506,
"step": 746500
},
{
"epoch": 0.85,
"learning_rate": 7.651881282899624e-06,
"loss": 0.96,
"step": 747000
},
{
"epoch": 0.85,
"learning_rate": 7.623525648805265e-06,
"loss": 0.9595,
"step": 747500
},
{
"epoch": 0.85,
"learning_rate": 7.595170014710903e-06,
"loss": 0.9584,
"step": 748000
},
{
"epoch": 0.85,
"learning_rate": 7.566814380616542e-06,
"loss": 0.9563,
"step": 748500
},
{
"epoch": 0.85,
"learning_rate": 7.538458746522181e-06,
"loss": 0.9485,
"step": 749000
},
{
"epoch": 0.85,
"learning_rate": 7.51015982369601e-06,
"loss": 0.9565,
"step": 749500
},
{
"epoch": 0.85,
"learning_rate": 7.481804189601649e-06,
"loss": 0.9652,
"step": 750000
},
{
"epoch": 0.85,
"learning_rate": 7.453448555507288e-06,
"loss": 0.9545,
"step": 750500
},
{
"epoch": 0.85,
"learning_rate": 7.425092921412928e-06,
"loss": 0.9546,
"step": 751000
},
{
"epoch": 0.85,
"learning_rate": 7.396737287318567e-06,
"loss": 0.9476,
"step": 751500
},
{
"epoch": 0.85,
"learning_rate": 7.368438364492394e-06,
"loss": 0.9592,
"step": 752000
},
{
"epoch": 0.85,
"learning_rate": 7.340139441666223e-06,
"loss": 0.9634,
"step": 752500
},
{
"epoch": 0.85,
"learning_rate": 7.311783807571862e-06,
"loss": 0.9472,
"step": 753000
},
{
"epoch": 0.85,
"learning_rate": 7.283428173477501e-06,
"loss": 0.9532,
"step": 753500
},
{
"epoch": 0.86,
"learning_rate": 7.255072539383142e-06,
"loss": 0.9553,
"step": 754000
},
{
"epoch": 0.86,
"learning_rate": 7.22671690528878e-06,
"loss": 0.949,
"step": 754500
},
{
"epoch": 0.86,
"learning_rate": 7.198417982462608e-06,
"loss": 0.9444,
"step": 755000
},
{
"epoch": 0.86,
"learning_rate": 7.170062348368246e-06,
"loss": 0.9637,
"step": 755500
},
{
"epoch": 0.86,
"learning_rate": 7.141706714273887e-06,
"loss": 0.9631,
"step": 756000
},
{
"epoch": 0.86,
"learning_rate": 7.113351080179525e-06,
"loss": 0.9647,
"step": 756500
},
{
"epoch": 0.86,
"learning_rate": 7.084995446085165e-06,
"loss": 0.9642,
"step": 757000
},
{
"epoch": 0.86,
"learning_rate": 7.056639811990803e-06,
"loss": 0.9535,
"step": 757500
},
{
"epoch": 0.86,
"learning_rate": 7.0282841778964436e-06,
"loss": 0.9574,
"step": 758000
},
{
"epoch": 0.86,
"learning_rate": 6.999928543802082e-06,
"loss": 0.9631,
"step": 758500
},
{
"epoch": 0.86,
"learning_rate": 6.97162962097591e-06,
"loss": 0.9613,
"step": 759000
},
{
"epoch": 0.86,
"learning_rate": 6.94327398688155e-06,
"loss": 0.9502,
"step": 759500
},
{
"epoch": 0.86,
"learning_rate": 6.914918352787189e-06,
"loss": 0.9564,
"step": 760000
},
{
"epoch": 0.86,
"learning_rate": 6.886562718692828e-06,
"loss": 0.9522,
"step": 760500
},
{
"epoch": 0.86,
"learning_rate": 6.858263795866657e-06,
"loss": 0.9613,
"step": 761000
},
{
"epoch": 0.86,
"learning_rate": 6.829908161772296e-06,
"loss": 0.9506,
"step": 761500
},
{
"epoch": 0.86,
"learning_rate": 6.801609238946123e-06,
"loss": 0.9601,
"step": 762000
},
{
"epoch": 0.86,
"learning_rate": 6.7732536048517635e-06,
"loss": 0.9619,
"step": 762500
},
{
"epoch": 0.87,
"learning_rate": 6.744897970757402e-06,
"loss": 0.951,
"step": 763000
},
{
"epoch": 0.87,
"learning_rate": 6.7165423366630415e-06,
"loss": 0.954,
"step": 763500
},
{
"epoch": 0.87,
"learning_rate": 6.68818670256868e-06,
"loss": 0.9555,
"step": 764000
},
{
"epoch": 0.87,
"learning_rate": 6.6598877797425084e-06,
"loss": 0.9579,
"step": 764500
},
{
"epoch": 0.87,
"learning_rate": 6.631532145648148e-06,
"loss": 0.9552,
"step": 765000
},
{
"epoch": 0.87,
"learning_rate": 6.6031765115537865e-06,
"loss": 0.9435,
"step": 765500
},
{
"epoch": 0.87,
"learning_rate": 6.574820877459427e-06,
"loss": 0.9568,
"step": 766000
},
{
"epoch": 0.87,
"learning_rate": 6.546465243365065e-06,
"loss": 0.9482,
"step": 766500
},
{
"epoch": 0.87,
"learning_rate": 6.518109609270705e-06,
"loss": 0.9496,
"step": 767000
},
{
"epoch": 0.87,
"learning_rate": 6.4897539751763435e-06,
"loss": 0.9513,
"step": 767500
},
{
"epoch": 0.87,
"learning_rate": 6.461398341081984e-06,
"loss": 0.9501,
"step": 768000
},
{
"epoch": 0.87,
"learning_rate": 6.433042706987623e-06,
"loss": 0.9417,
"step": 768500
},
{
"epoch": 0.87,
"learning_rate": 6.404687072893262e-06,
"loss": 0.9488,
"step": 769000
},
{
"epoch": 0.87,
"learning_rate": 6.3763314387989005e-06,
"loss": 0.9518,
"step": 769500
},
{
"epoch": 0.87,
"learning_rate": 6.34803251597273e-06,
"loss": 0.9483,
"step": 770000
},
{
"epoch": 0.87,
"learning_rate": 6.319676881878368e-06,
"loss": 0.9482,
"step": 770500
},
{
"epoch": 0.87,
"learning_rate": 6.291321247784007e-06,
"loss": 0.9463,
"step": 771000
},
{
"epoch": 0.88,
"learning_rate": 6.262965613689646e-06,
"loss": 0.9576,
"step": 771500
},
{
"epoch": 0.88,
"learning_rate": 6.234609979595286e-06,
"loss": 0.9539,
"step": 772000
},
{
"epoch": 0.88,
"learning_rate": 6.206311056769114e-06,
"loss": 0.9542,
"step": 772500
},
{
"epoch": 0.88,
"learning_rate": 6.1779554226747535e-06,
"loss": 0.9523,
"step": 773000
},
{
"epoch": 0.88,
"learning_rate": 6.149599788580392e-06,
"loss": 0.9535,
"step": 773500
},
{
"epoch": 0.88,
"learning_rate": 6.1212441544860315e-06,
"loss": 0.9559,
"step": 774000
},
{
"epoch": 0.88,
"learning_rate": 6.092888520391671e-06,
"loss": 0.9533,
"step": 774500
},
{
"epoch": 0.88,
"learning_rate": 6.0645328862973105e-06,
"loss": 0.9501,
"step": 775000
},
{
"epoch": 0.88,
"learning_rate": 6.036177252202949e-06,
"loss": 0.9479,
"step": 775500
},
{
"epoch": 0.88,
"learning_rate": 6.0078216181085885e-06,
"loss": 0.9554,
"step": 776000
},
{
"epoch": 0.88,
"learning_rate": 5.979522695282417e-06,
"loss": 0.9515,
"step": 776500
},
{
"epoch": 0.88,
"learning_rate": 5.951223772456245e-06,
"loss": 0.9556,
"step": 777000
},
{
"epoch": 0.88,
"learning_rate": 5.922868138361884e-06,
"loss": 0.9558,
"step": 777500
},
{
"epoch": 0.88,
"learning_rate": 5.894512504267523e-06,
"loss": 0.943,
"step": 778000
},
{
"epoch": 0.88,
"learning_rate": 5.866156870173163e-06,
"loss": 0.9576,
"step": 778500
},
{
"epoch": 0.88,
"learning_rate": 5.837801236078802e-06,
"loss": 0.9565,
"step": 779000
},
{
"epoch": 0.88,
"learning_rate": 5.8095023132526295e-06,
"loss": 0.953,
"step": 779500
},
{
"epoch": 0.88,
"learning_rate": 5.781146679158269e-06,
"loss": 0.9467,
"step": 780000
},
{
"epoch": 0.89,
"learning_rate": 5.752791045063908e-06,
"loss": 0.9592,
"step": 780500
},
{
"epoch": 0.89,
"learning_rate": 5.724492122237736e-06,
"loss": 0.9514,
"step": 781000
},
{
"epoch": 0.89,
"learning_rate": 5.696136488143375e-06,
"loss": 0.943,
"step": 781500
},
{
"epoch": 0.89,
"learning_rate": 5.667780854049015e-06,
"loss": 0.9479,
"step": 782000
},
{
"epoch": 0.89,
"learning_rate": 5.639425219954654e-06,
"loss": 0.9525,
"step": 782500
},
{
"epoch": 0.89,
"learning_rate": 5.611069585860293e-06,
"loss": 0.9518,
"step": 783000
},
{
"epoch": 0.89,
"learning_rate": 5.582713951765932e-06,
"loss": 0.949,
"step": 783500
},
{
"epoch": 0.89,
"learning_rate": 5.554358317671572e-06,
"loss": 0.9504,
"step": 784000
},
{
"epoch": 0.89,
"learning_rate": 5.526002683577211e-06,
"loss": 0.9545,
"step": 784500
},
{
"epoch": 0.89,
"learning_rate": 5.4977037607510395e-06,
"loss": 0.9445,
"step": 785000
},
{
"epoch": 0.89,
"learning_rate": 5.469348126656678e-06,
"loss": 0.9439,
"step": 785500
},
{
"epoch": 0.89,
"learning_rate": 5.440992492562318e-06,
"loss": 0.9426,
"step": 786000
},
{
"epoch": 0.89,
"learning_rate": 5.412636858467956e-06,
"loss": 0.9525,
"step": 786500
},
{
"epoch": 0.89,
"learning_rate": 5.3842812243735965e-06,
"loss": 0.9472,
"step": 787000
},
{
"epoch": 0.89,
"learning_rate": 5.355925590279235e-06,
"loss": 0.954,
"step": 787500
},
{
"epoch": 0.89,
"learning_rate": 5.3275699561848746e-06,
"loss": 0.9461,
"step": 788000
},
{
"epoch": 0.89,
"learning_rate": 5.299214322090513e-06,
"loss": 0.9437,
"step": 788500
},
{
"epoch": 0.89,
"learning_rate": 5.2709153992643415e-06,
"loss": 0.9481,
"step": 789000
},
{
"epoch": 0.9,
"learning_rate": 5.242559765169981e-06,
"loss": 0.9347,
"step": 789500
},
{
"epoch": 0.9,
"learning_rate": 5.21420413107562e-06,
"loss": 0.9457,
"step": 790000
},
{
"epoch": 0.9,
"learning_rate": 5.18584849698126e-06,
"loss": 0.9579,
"step": 790500
},
{
"epoch": 0.9,
"learning_rate": 5.157549574155087e-06,
"loss": 0.9443,
"step": 791000
},
{
"epoch": 0.9,
"learning_rate": 5.129193940060727e-06,
"loss": 0.9477,
"step": 791500
},
{
"epoch": 0.9,
"learning_rate": 5.100838305966365e-06,
"loss": 0.9481,
"step": 792000
},
{
"epoch": 0.9,
"learning_rate": 5.072539383140194e-06,
"loss": 0.9479,
"step": 792500
},
{
"epoch": 0.9,
"learning_rate": 5.044183749045833e-06,
"loss": 0.9608,
"step": 793000
},
{
"epoch": 0.9,
"learning_rate": 5.015884826219661e-06,
"loss": 0.9541,
"step": 793500
},
{
"epoch": 0.9,
"learning_rate": 4.9875291921253e-06,
"loss": 0.9438,
"step": 794000
},
{
"epoch": 0.9,
"learning_rate": 4.9591735580309395e-06,
"loss": 0.9448,
"step": 794500
},
{
"epoch": 0.9,
"learning_rate": 4.930817923936579e-06,
"loss": 0.9434,
"step": 795000
},
{
"epoch": 0.9,
"learning_rate": 4.902462289842218e-06,
"loss": 0.9566,
"step": 795500
},
{
"epoch": 0.9,
"learning_rate": 4.874163367016047e-06,
"loss": 0.9526,
"step": 796000
},
{
"epoch": 0.9,
"learning_rate": 4.845807732921685e-06,
"loss": 0.9459,
"step": 796500
},
{
"epoch": 0.9,
"learning_rate": 4.817452098827325e-06,
"loss": 0.947,
"step": 797000
},
{
"epoch": 0.9,
"learning_rate": 4.789096464732964e-06,
"loss": 0.9571,
"step": 797500
},
{
"epoch": 0.91,
"learning_rate": 4.760740830638604e-06,
"loss": 0.9537,
"step": 798000
},
{
"epoch": 0.91,
"learning_rate": 4.732385196544242e-06,
"loss": 0.9508,
"step": 798500
},
{
"epoch": 0.91,
"learning_rate": 4.704029562449882e-06,
"loss": 0.9405,
"step": 799000
},
{
"epoch": 0.91,
"learning_rate": 4.675673928355521e-06,
"loss": 0.9498,
"step": 799500
},
{
"epoch": 0.91,
"learning_rate": 4.647318294261161e-06,
"loss": 0.9484,
"step": 800000
},
{
"epoch": 0.91,
"learning_rate": 4.618962660166799e-06,
"loss": 0.9463,
"step": 800500
},
{
"epoch": 0.91,
"learning_rate": 4.5906637373406275e-06,
"loss": 0.9462,
"step": 801000
},
{
"epoch": 0.91,
"learning_rate": 4.562308103246267e-06,
"loss": 0.9494,
"step": 801500
},
{
"epoch": 0.91,
"learning_rate": 4.533952469151906e-06,
"loss": 0.9441,
"step": 802000
},
{
"epoch": 0.91,
"learning_rate": 4.505596835057545e-06,
"loss": 0.9441,
"step": 802500
},
{
"epoch": 0.91,
"learning_rate": 4.4772412009631845e-06,
"loss": 0.9421,
"step": 803000
},
{
"epoch": 0.91,
"learning_rate": 4.448885566868824e-06,
"loss": 0.9421,
"step": 803500
},
{
"epoch": 0.91,
"learning_rate": 4.420586644042651e-06,
"loss": 0.9506,
"step": 804000
},
{
"epoch": 0.91,
"learning_rate": 4.392231009948291e-06,
"loss": 0.9457,
"step": 804500
},
{
"epoch": 0.91,
"learning_rate": 4.363932087122119e-06,
"loss": 0.9373,
"step": 805000
},
{
"epoch": 0.91,
"learning_rate": 4.335576453027758e-06,
"loss": 0.955,
"step": 805500
},
{
"epoch": 0.91,
"learning_rate": 4.307220818933397e-06,
"loss": 0.9482,
"step": 806000
},
{
"epoch": 0.91,
"learning_rate": 4.278865184839037e-06,
"loss": 0.943,
"step": 806500
},
{
"epoch": 0.92,
"learning_rate": 4.250509550744676e-06,
"loss": 0.9444,
"step": 807000
},
{
"epoch": 0.92,
"learning_rate": 4.222153916650315e-06,
"loss": 0.9517,
"step": 807500
},
{
"epoch": 0.92,
"learning_rate": 4.193798282555954e-06,
"loss": 0.9513,
"step": 808000
},
{
"epoch": 0.92,
"learning_rate": 4.165442648461594e-06,
"loss": 0.9419,
"step": 808500
},
{
"epoch": 0.92,
"learning_rate": 4.137087014367233e-06,
"loss": 0.9427,
"step": 809000
},
{
"epoch": 0.92,
"learning_rate": 4.108731380272872e-06,
"loss": 0.9536,
"step": 809500
},
{
"epoch": 0.92,
"learning_rate": 4.080375746178511e-06,
"loss": 0.9402,
"step": 810000
},
{
"epoch": 0.92,
"learning_rate": 4.0520768233523395e-06,
"loss": 0.9479,
"step": 810500
},
{
"epoch": 0.92,
"learning_rate": 4.023721189257978e-06,
"loss": 0.9383,
"step": 811000
},
{
"epoch": 0.92,
"learning_rate": 3.995365555163618e-06,
"loss": 0.9398,
"step": 811500
},
{
"epoch": 0.92,
"learning_rate": 3.967009921069257e-06,
"loss": 0.9452,
"step": 812000
},
{
"epoch": 0.92,
"learning_rate": 3.9386542869748964e-06,
"loss": 0.946,
"step": 812500
},
{
"epoch": 0.92,
"learning_rate": 3.910298652880536e-06,
"loss": 0.9398,
"step": 813000
},
{
"epoch": 0.92,
"learning_rate": 3.881999730054363e-06,
"loss": 0.9459,
"step": 813500
},
{
"epoch": 0.92,
"learning_rate": 3.853644095960003e-06,
"loss": 0.9498,
"step": 814000
},
{
"epoch": 0.92,
"learning_rate": 3.825288461865642e-06,
"loss": 0.9454,
"step": 814500
},
{
"epoch": 0.92,
"learning_rate": 3.7969328277712817e-06,
"loss": 0.9592,
"step": 815000
},
{
"epoch": 0.92,
"learning_rate": 3.7685771936769207e-06,
"loss": 0.9392,
"step": 815500
},
{
"epoch": 0.93,
"learning_rate": 3.74022155958256e-06,
"loss": 0.942,
"step": 816000
},
{
"epoch": 0.93,
"learning_rate": 3.7118659254881992e-06,
"loss": 0.9476,
"step": 816500
},
{
"epoch": 0.93,
"learning_rate": 3.6835102913938387e-06,
"loss": 0.9465,
"step": 817000
},
{
"epoch": 0.93,
"learning_rate": 3.6551546572994777e-06,
"loss": 0.9465,
"step": 817500
},
{
"epoch": 0.93,
"learning_rate": 3.626799023205117e-06,
"loss": 0.952,
"step": 818000
},
{
"epoch": 0.93,
"learning_rate": 3.5984433891107562e-06,
"loss": 0.9371,
"step": 818500
},
{
"epoch": 0.93,
"learning_rate": 3.570144466284584e-06,
"loss": 0.9422,
"step": 819000
},
{
"epoch": 0.93,
"learning_rate": 3.541788832190223e-06,
"loss": 0.9438,
"step": 819500
},
{
"epoch": 0.93,
"learning_rate": 3.5134331980958626e-06,
"loss": 0.937,
"step": 820000
},
{
"epoch": 0.93,
"learning_rate": 3.4850775640015016e-06,
"loss": 0.9507,
"step": 820500
},
{
"epoch": 0.93,
"learning_rate": 3.4567786411753295e-06,
"loss": 0.9434,
"step": 821000
},
{
"epoch": 0.93,
"learning_rate": 3.4284230070809694e-06,
"loss": 0.9368,
"step": 821500
},
{
"epoch": 0.93,
"learning_rate": 3.400067372986608e-06,
"loss": 0.9519,
"step": 822000
},
{
"epoch": 0.93,
"learning_rate": 3.371711738892248e-06,
"loss": 0.9402,
"step": 822500
},
{
"epoch": 0.93,
"learning_rate": 3.3433561047978865e-06,
"loss": 0.9381,
"step": 823000
},
{
"epoch": 0.93,
"learning_rate": 3.3150571819717147e-06,
"loss": 0.9404,
"step": 823500
},
{
"epoch": 0.93,
"learning_rate": 3.2867582591455426e-06,
"loss": 0.9551,
"step": 824000
},
{
"epoch": 0.94,
"learning_rate": 3.2584026250511825e-06,
"loss": 0.9476,
"step": 824500
},
{
"epoch": 0.94,
"learning_rate": 3.230046990956821e-06,
"loss": 0.941,
"step": 825000
},
{
"epoch": 0.94,
"learning_rate": 3.201691356862461e-06,
"loss": 0.9441,
"step": 825500
},
{
"epoch": 0.94,
"learning_rate": 3.1733357227680996e-06,
"loss": 0.9496,
"step": 826000
},
{
"epoch": 0.94,
"learning_rate": 3.1449800886737395e-06,
"loss": 0.9427,
"step": 826500
},
{
"epoch": 0.94,
"learning_rate": 3.116624454579378e-06,
"loss": 0.9489,
"step": 827000
},
{
"epoch": 0.94,
"learning_rate": 3.0882688204850175e-06,
"loss": 0.9433,
"step": 827500
},
{
"epoch": 0.94,
"learning_rate": 3.0599698976588454e-06,
"loss": 0.9487,
"step": 828000
},
{
"epoch": 0.94,
"learning_rate": 3.031614263564485e-06,
"loss": 0.9404,
"step": 828500
},
{
"epoch": 0.94,
"learning_rate": 3.003258629470124e-06,
"loss": 0.9361,
"step": 829000
},
{
"epoch": 0.94,
"learning_rate": 2.975016417912141e-06,
"loss": 0.9489,
"step": 829500
},
{
"epoch": 0.94,
"learning_rate": 2.94666078381778e-06,
"loss": 0.9455,
"step": 830000
},
{
"epoch": 0.94,
"learning_rate": 2.9183051497234195e-06,
"loss": 0.9409,
"step": 830500
},
{
"epoch": 0.94,
"learning_rate": 2.8899495156290585e-06,
"loss": 0.9396,
"step": 831000
},
{
"epoch": 0.94,
"learning_rate": 2.861593881534698e-06,
"loss": 0.9349,
"step": 831500
},
{
"epoch": 0.94,
"learning_rate": 2.833238247440337e-06,
"loss": 0.9412,
"step": 832000
},
{
"epoch": 0.94,
"learning_rate": 2.804939324614165e-06,
"loss": 0.9498,
"step": 832500
},
{
"epoch": 0.94,
"learning_rate": 2.776583690519804e-06,
"loss": 0.94,
"step": 833000
},
{
"epoch": 0.95,
"learning_rate": 2.7482280564254434e-06,
"loss": 0.9424,
"step": 833500
},
{
"epoch": 0.95,
"learning_rate": 2.719872422331083e-06,
"loss": 0.9448,
"step": 834000
},
{
"epoch": 0.95,
"learning_rate": 2.691516788236722e-06,
"loss": 0.9464,
"step": 834500
},
{
"epoch": 0.95,
"learning_rate": 2.6631611541423613e-06,
"loss": 0.9364,
"step": 835000
},
{
"epoch": 0.95,
"learning_rate": 2.6348055200480004e-06,
"loss": 0.942,
"step": 835500
},
{
"epoch": 0.95,
"learning_rate": 2.60644988595364e-06,
"loss": 0.9414,
"step": 836000
},
{
"epoch": 0.95,
"learning_rate": 2.578094251859279e-06,
"loss": 0.9394,
"step": 836500
},
{
"epoch": 0.95,
"learning_rate": 2.5497386177649183e-06,
"loss": 0.9384,
"step": 837000
},
{
"epoch": 0.95,
"learning_rate": 2.5213829836705573e-06,
"loss": 0.9438,
"step": 837500
},
{
"epoch": 0.95,
"learning_rate": 2.493027349576197e-06,
"loss": 0.9328,
"step": 838000
},
{
"epoch": 0.95,
"learning_rate": 2.464671715481836e-06,
"loss": 0.9497,
"step": 838500
},
{
"epoch": 0.95,
"learning_rate": 2.436372792655664e-06,
"loss": 0.9486,
"step": 839000
},
{
"epoch": 0.95,
"learning_rate": 2.408017158561303e-06,
"loss": 0.9407,
"step": 839500
},
{
"epoch": 0.95,
"learning_rate": 2.3796615244669426e-06,
"loss": 0.9438,
"step": 840000
},
{
"epoch": 0.95,
"learning_rate": 2.3513058903725817e-06,
"loss": 0.9432,
"step": 840500
},
{
"epoch": 0.95,
"learning_rate": 2.322950256278221e-06,
"loss": 0.9379,
"step": 841000
},
{
"epoch": 0.95,
"learning_rate": 2.29459462218386e-06,
"loss": 0.9375,
"step": 841500
},
{
"epoch": 0.96,
"learning_rate": 2.2662389880894996e-06,
"loss": 0.9429,
"step": 842000
},
{
"epoch": 0.96,
"learning_rate": 2.2378833539951386e-06,
"loss": 0.9416,
"step": 842500
},
{
"epoch": 0.96,
"learning_rate": 2.209527719900778e-06,
"loss": 0.9437,
"step": 843000
},
{
"epoch": 0.96,
"learning_rate": 2.181228797074606e-06,
"loss": 0.9402,
"step": 843500
},
{
"epoch": 0.96,
"learning_rate": 2.1528731629802454e-06,
"loss": 0.9447,
"step": 844000
},
{
"epoch": 0.96,
"learning_rate": 2.1245175288858844e-06,
"loss": 0.9389,
"step": 844500
},
{
"epoch": 0.96,
"learning_rate": 2.0962186060597127e-06,
"loss": 0.9361,
"step": 845000
},
{
"epoch": 0.96,
"learning_rate": 2.0678629719653518e-06,
"loss": 0.9518,
"step": 845500
},
{
"epoch": 0.96,
"learning_rate": 2.0395073378709912e-06,
"loss": 0.9421,
"step": 846000
},
{
"epoch": 0.96,
"learning_rate": 2.0111517037766303e-06,
"loss": 0.943,
"step": 846500
},
{
"epoch": 0.96,
"learning_rate": 1.9827960696822697e-06,
"loss": 0.9367,
"step": 847000
},
{
"epoch": 0.96,
"learning_rate": 1.9544404355879087e-06,
"loss": 0.9387,
"step": 847500
},
{
"epoch": 0.96,
"learning_rate": 1.926084801493548e-06,
"loss": 0.941,
"step": 848000
},
{
"epoch": 0.96,
"learning_rate": 1.8977291673991874e-06,
"loss": 0.9456,
"step": 848500
},
{
"epoch": 0.96,
"learning_rate": 1.869486955841204e-06,
"loss": 0.951,
"step": 849000
},
{
"epoch": 0.96,
"learning_rate": 1.8411313217468432e-06,
"loss": 0.9447,
"step": 849500
},
{
"epoch": 0.96,
"learning_rate": 1.8127756876524824e-06,
"loss": 0.9369,
"step": 850000
},
{
"epoch": 0.96,
"learning_rate": 1.7844200535581217e-06,
"loss": 0.9509,
"step": 850500
},
{
"epoch": 0.97,
"learning_rate": 1.756064419463761e-06,
"loss": 0.9332,
"step": 851000
},
{
"epoch": 0.97,
"learning_rate": 1.7277087853694002e-06,
"loss": 0.9406,
"step": 851500
},
{
"epoch": 0.97,
"learning_rate": 1.6993531512750394e-06,
"loss": 0.9376,
"step": 852000
},
{
"epoch": 0.97,
"learning_rate": 1.6710542284488675e-06,
"loss": 0.9314,
"step": 852500
},
{
"epoch": 0.97,
"learning_rate": 1.6426985943545067e-06,
"loss": 0.9424,
"step": 853000
},
{
"epoch": 0.97,
"learning_rate": 1.614342960260146e-06,
"loss": 0.9371,
"step": 853500
},
{
"epoch": 0.97,
"learning_rate": 1.5859873261657852e-06,
"loss": 0.9481,
"step": 854000
},
{
"epoch": 0.97,
"learning_rate": 1.5576316920714245e-06,
"loss": 0.9351,
"step": 854500
},
{
"epoch": 0.97,
"learning_rate": 1.5292760579770637e-06,
"loss": 0.9399,
"step": 855000
},
{
"epoch": 0.97,
"learning_rate": 1.500920423882703e-06,
"loss": 0.9474,
"step": 855500
},
{
"epoch": 0.97,
"learning_rate": 1.472621501056531e-06,
"loss": 0.9376,
"step": 856000
},
{
"epoch": 0.97,
"learning_rate": 1.4442658669621703e-06,
"loss": 0.9332,
"step": 856500
},
{
"epoch": 0.97,
"learning_rate": 1.4159102328678095e-06,
"loss": 0.9468,
"step": 857000
},
{
"epoch": 0.97,
"learning_rate": 1.3875545987734488e-06,
"loss": 0.9452,
"step": 857500
},
{
"epoch": 0.97,
"learning_rate": 1.3592556759472766e-06,
"loss": 0.9416,
"step": 858000
},
{
"epoch": 0.97,
"learning_rate": 1.3309000418529159e-06,
"loss": 0.9406,
"step": 858500
},
{
"epoch": 0.97,
"learning_rate": 1.3025444077585551e-06,
"loss": 0.9367,
"step": 859000
},
{
"epoch": 0.97,
"learning_rate": 1.2741887736641944e-06,
"loss": 0.9474,
"step": 859500
},
{
"epoch": 0.98,
"learning_rate": 1.2458331395698336e-06,
"loss": 0.9392,
"step": 860000
},
{
"epoch": 0.98,
"learning_rate": 1.2174775054754729e-06,
"loss": 0.9337,
"step": 860500
},
{
"epoch": 0.98,
"learning_rate": 1.1891218713811123e-06,
"loss": 0.9317,
"step": 861000
},
{
"epoch": 0.98,
"learning_rate": 1.1608229485549402e-06,
"loss": 0.9367,
"step": 861500
},
{
"epoch": 0.98,
"learning_rate": 1.1324673144605794e-06,
"loss": 0.9523,
"step": 862000
},
{
"epoch": 0.98,
"learning_rate": 1.1041116803662187e-06,
"loss": 0.9487,
"step": 862500
},
{
"epoch": 0.98,
"learning_rate": 1.0757560462718581e-06,
"loss": 0.942,
"step": 863000
},
{
"epoch": 0.98,
"learning_rate": 1.0474004121774974e-06,
"loss": 0.9439,
"step": 863500
},
{
"epoch": 0.98,
"learning_rate": 1.0190447780831366e-06,
"loss": 0.9339,
"step": 864000
},
{
"epoch": 0.98,
"learning_rate": 9.907458552569645e-07,
"loss": 0.9431,
"step": 864500
},
{
"epoch": 0.98,
"learning_rate": 9.623902211626037e-07,
"loss": 0.9384,
"step": 865000
},
{
"epoch": 0.98,
"learning_rate": 9.34034587068243e-07,
"loss": 0.9471,
"step": 865500
},
{
"epoch": 0.98,
"learning_rate": 9.056789529738822e-07,
"loss": 0.9422,
"step": 866000
},
{
"epoch": 0.98,
"learning_rate": 8.773233188795215e-07,
"loss": 0.9384,
"step": 866500
},
{
"epoch": 0.98,
"learning_rate": 8.489676847851607e-07,
"loss": 0.9454,
"step": 867000
},
{
"epoch": 0.98,
"learning_rate": 8.206120506908e-07,
"loss": 0.9442,
"step": 867500
},
{
"epoch": 0.98,
"learning_rate": 7.922564165964392e-07,
"loss": 0.9336,
"step": 868000
},
{
"epoch": 0.99,
"learning_rate": 7.639007825020786e-07,
"loss": 0.9368,
"step": 868500
},
{
"epoch": 0.99,
"learning_rate": 7.356018596759065e-07,
"loss": 0.9461,
"step": 869000
},
{
"epoch": 0.99,
"learning_rate": 7.072462255815458e-07,
"loss": 0.9402,
"step": 869500
},
{
"epoch": 0.99,
"learning_rate": 6.78890591487185e-07,
"loss": 0.9414,
"step": 870000
},
{
"epoch": 0.99,
"learning_rate": 6.505349573928243e-07,
"loss": 0.9404,
"step": 870500
},
{
"epoch": 0.99,
"learning_rate": 6.221793232984635e-07,
"loss": 0.9533,
"step": 871000
},
{
"epoch": 0.99,
"learning_rate": 5.938236892041027e-07,
"loss": 0.9579,
"step": 871500
},
{
"epoch": 0.99,
"learning_rate": 5.655247663779307e-07,
"loss": 0.948,
"step": 872000
},
{
"epoch": 0.99,
"learning_rate": 5.3716913228357e-07,
"loss": 0.9422,
"step": 872500
},
{
"epoch": 0.99,
"learning_rate": 5.088134981892092e-07,
"loss": 0.9362,
"step": 873000
},
{
"epoch": 0.99,
"learning_rate": 4.804578640948485e-07,
"loss": 0.9432,
"step": 873500
},
{
"epoch": 0.99,
"learning_rate": 4.5210223000048775e-07,
"loss": 0.9325,
"step": 874000
},
{
"epoch": 0.99,
"learning_rate": 4.23746595906127e-07,
"loss": 0.9241,
"step": 874500
},
{
"epoch": 0.99,
"learning_rate": 3.9539096181176624e-07,
"loss": 0.9395,
"step": 875000
},
{
"epoch": 0.99,
"learning_rate": 3.670353277174055e-07,
"loss": 0.9564,
"step": 875500
},
{
"epoch": 0.99,
"learning_rate": 3.386796936230448e-07,
"loss": 0.9423,
"step": 876000
},
{
"epoch": 0.99,
"learning_rate": 3.103807707968727e-07,
"loss": 0.9459,
"step": 876500
},
{
"epoch": 0.99,
"learning_rate": 2.82025136702512e-07,
"loss": 0.9397,
"step": 877000
},
{
"epoch": 1.0,
"learning_rate": 2.5366950260815125e-07,
"loss": 0.9326,
"step": 877500
},
{
"epoch": 1.0,
"learning_rate": 2.253138685137905e-07,
"loss": 0.951,
"step": 878000
},
{
"epoch": 1.0,
"learning_rate": 1.9695823441942974e-07,
"loss": 0.9426,
"step": 878500
},
{
"epoch": 1.0,
"learning_rate": 1.686593115932577e-07,
"loss": 0.9356,
"step": 879000
},
{
"epoch": 1.0,
"learning_rate": 1.4030367749889696e-07,
"loss": 0.935,
"step": 879500
},
{
"epoch": 1.0,
"learning_rate": 1.1194804340453621e-07,
"loss": 0.9338,
"step": 880000
},
{
"epoch": 1.0,
"learning_rate": 8.36491205783642e-08,
"loss": 0.9346,
"step": 880500
},
{
"epoch": 1.0,
"learning_rate": 5.5293486484003454e-08,
"loss": 0.9402,
"step": 881000
},
{
"epoch": 1.0,
"learning_rate": 2.6937852389642708e-08,
"loss": 0.937,
"step": 881500
}
],
"max_steps": 881659,
"num_train_epochs": 1,
"total_flos": 3.713762155948923e+18,
"trial_name": null,
"trial_params": null
}