From 26a90f04d79b4623d1a32d19f3d79853870c2479 Mon Sep 17 00:00:00 2001 From: gbatmaz <50459436+gbatmaz@users.noreply.github.com> Date: Mon, 12 Sep 2022 14:51:46 +0100 Subject: [PATCH 1/4] Update autoencoder.py add early stop --- dfencoder/autoencoder.py | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/dfencoder/autoencoder.py b/dfencoder/autoencoder.py index 75a02bd..38aacbc 100644 --- a/dfencoder/autoencoder.py +++ b/dfencoder/autoencoder.py @@ -658,7 +658,7 @@ def _create_stat_dict(self, a): std = scaler.std return {'scaler': scaler, 'mean': mean, 'std': std} - def fit(self, df, epochs=1, val=None): + def fit(self, df, epochs=1, val=None, patience=5): """Does training.""" pdf = df.copy() # if val is None: @@ -687,6 +687,9 @@ def fit(self, df, epochs=1, val=None): n_updates = len(df) // self.batch_size if len(df) % self.batch_size > 0: n_updates += 1 + last_loss = 5000 + + count_es = 0 for i in range(epochs): self.train() if self.verbose: @@ -724,6 +727,26 @@ def fit(self, df, epochs=1, val=None): num, bin, cat = self.forward(slc_out_tensor) _, _, _, net_loss = self.compute_loss(num, bin, cat, slc_out, _id=True) id_loss.append(net_loss) + + #Earlystopping + current_net_loss = net_loss + print('The Current Net Loss:', current_net_loss) + + if current_net_loss > last_loss: + count_es += 1 + print('Early stop count:', count_es) + + if count_es >= patience: + print('Early stopping!\n') + break + + else: + print('set count for earlystop: 0') + count_es = 0 + + last_loss = current_net_loss + + self.logger.end_epoch() # if self.project_embeddings: @@ -740,6 +763,7 @@ def fit(self, df, epochs=1, val=None): msg += 'net validation loss, unaltered input: \n' msg += f"{round(id_loss, 4)} \n\n\n" print(msg) + #Getting training loss statistics # mse_loss, bce_loss, cce_loss, _ = self.get_anomaly_score(pdf) if pdf_val is None else self.get_anomaly_score(pd.concat([pdf, pdf_val])) @@ -1041,4 +1065,4 @@ def get_results(self, df, return_abs = False): result_cols.append('max_abs_z') result_cols.append('mean_abs_z') return pdf[result_cols] - \ No newline at end of file + From 91815265e05e4e789cbd7f77df6bd17799afd8a8 Mon Sep 17 00:00:00 2001 From: Eli Fajardo Date: Mon, 10 Oct 2022 16:00:54 +0000 Subject: [PATCH 2/4] print when early stopping --- dfencoder/autoencoder.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/dfencoder/autoencoder.py b/dfencoder/autoencoder.py index cec0ec6..fb76944 100644 --- a/dfencoder/autoencoder.py +++ b/dfencoder/autoencoder.py @@ -692,7 +692,7 @@ def fit(self, df, epochs=1, val=None, patience=5): _, _, _, net_loss = self.compute_loss(num, bin, cat, slc_out, _id=True) id_loss.append(net_loss) - #Earlystopping + # Early stopping current_net_loss = net_loss if self.verbose: print('The Current Net Loss:', current_net_loss) @@ -703,13 +703,12 @@ def fit(self, df, epochs=1, val=None, patience=5): print('Early stop count:', count_es) if count_es >= patience: - if self.verbose: - print('Early stopping!\n') + print('Early stopping: early stop count({}) >= patience({})'.format(count_es, patience)) break else: if self.verbose: - print('set count for earlystop: 0') + print('Set count for earlystop: 0') count_es = 0 self.logger.end_epoch() From c6570825035efbd20af57f6961b71ed61e9ecf88 Mon Sep 17 00:00:00 2001 From: Eli Fajardo Date: Wed, 16 Nov 2022 16:35:56 +0000 Subject: [PATCH 3/4] move patience arg to constructor --- dfencoder/autoencoder.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/dfencoder/autoencoder.py b/dfencoder/autoencoder.py index fb76944..82a06e6 100644 --- a/dfencoder/autoencoder.py +++ b/dfencoder/autoencoder.py @@ -165,6 +165,7 @@ def __init__(self, progress_bar=True, n_megabatches=1, scaler='standard', + patience=5, *args, **kwargs): super(AutoEncoder, self).__init__(*args, **kwargs) @@ -224,6 +225,8 @@ def __init__(self, self.project_embeddings = project_embeddings self.scaler = scaler + + self.patience = patience self.n_megabatches = n_megabatches @@ -622,7 +625,7 @@ def _create_stat_dict(self, a): std = scaler.std return {'scaler': scaler, 'mean': mean, 'std': std} - def fit(self, df, epochs=1, val=None, patience=5): + def fit(self, df, epochs=1, val=None): """Does training.""" pdf = df.copy() # if val is None: @@ -702,8 +705,9 @@ def fit(self, df, epochs=1, val=None, patience=5): if self.verbose: print('Early stop count:', count_es) - if count_es >= patience: - print('Early stopping: early stop count({}) >= patience({})'.format(count_es, patience)) + if count_es >= self.patience: + if self.verbose: + print('Early stopping: early stop count({}) >= patience({})'.format(count_es, self.patience)) break else: From 9da3bbdd610308234067bf0a4854e68369c9a76d Mon Sep 17 00:00:00 2001 From: Eli Fajardo Date: Wed, 30 Nov 2022 21:49:23 +0000 Subject: [PATCH 4/4] update last_loss in early stop --- dfencoder/autoencoder.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dfencoder/autoencoder.py b/dfencoder/autoencoder.py index 9e7510b..c97bd40 100644 --- a/dfencoder/autoencoder.py +++ b/dfencoder/autoencoder.py @@ -720,6 +720,8 @@ def fit(self, df, epochs=1, val=None): print('Set count for earlystop: 0') count_es = 0 + last_loss = current_net_loss + self.logger.end_epoch() # if self.project_embeddings: # self.logger.show_embeddings(self.categorical_fts)