diff --git a/README.md b/README.md index 9562ca4..33661fb 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,17 @@ Examples are: > python main.py 1 2 4 ``` +## Testing +After modeling is complete, testing can be performed on the specified testing dataset. To test a model given specific configurations, do: +``` +> python main.py --testing +``` +To test all configurations, simply run: +``` +> python main.py --testing +``` +For testing to be successful, an existing (and trained) model should exist in the `logs` directory with the same name as in the configuration file for the given configuration id. + ## Application Endpoint There is a simple RESTful endpoint that can be used to make predictions on a specified title. To use the endpoint the model has to be trained alreay. Once the network is trained and ready to make some predictions, run ``` diff --git a/main.py b/main.py index 5fb2939..81ee839 100644 --- a/main.py +++ b/main.py @@ -36,6 +36,7 @@ def main(): parser.add_argument('configs', metavar='C', type=int, nargs='*', help='Config number to use (can be multiple)') parser.add_argument('--application', action='store_true') + parser.add_argument('--testing', action='store_true') args = parser.parse_args() if args.application: conf_num = args.configs[0] if args.configs else 0 @@ -49,9 +50,12 @@ def main(): builder = ModelBuilder(config_file, sess) network_model = builder.build() - if config_file[USE_PRETRAINED_NET]: + if config_file[USE_PRETRAINED_NET] and not args.testing: network_model.train(USE_PRETRAINED_NET) - network_model.train() + if args.testing: + network_model.test() + else: + network_model.train() network_model.close_writers() tf.reset_default_graph() except Exception as e: diff --git a/model/model.py b/model/model.py index 773ad34..0b6a6f9 100644 --- a/model/model.py +++ b/model/model.py @@ -101,6 +101,9 @@ def __init__(self, config, session): self.error_sum = None self.recall_sum_validation = None self.f1_sum_validation = None + self.prec_sum_testing = None + self.recall_sum_testing = None + self.f1_sum_testing = None self.logging_dir = build_structure(config) self.checkpoints_dir = self.logging_dir + '/' + CHECKPOINTS_DIR + '/' + "models.ckpt" @@ -150,6 +153,27 @@ def predict(self, title, subreddit='UNK'): result.append(self.data.rev_users_dict[i]) return result + def test(self): + """ Tests the model using the testing set """ + print("Starting testing...") + + test_data, test_sub, test_labels = self.data.get_testing() + test_prec, test_err, test_recall, test_f1 = \ + self._session.run([self.prec_sum_testing, + self.error_sum, + self.recall_sum_testing, + self.f1_sum_testing], + {self.input: test_data, + self.subreddit_input: test_sub, + self.target: test_labels, + self.keep_prob: 1.0}) + + + print("Testing Precision: ", get_val_summary_tensor(test_prec)) + print("Testing Recall: ", get_val_summary_tensor(test_recall)) + print("Testing F1: ", get_val_summary_tensor(test_f1)) + print("Testing Error: ", get_val_summary_tensor(test_err)) + def validate(self): """ Validates the model and returns the final precision """ print("Starting validation...") @@ -283,16 +307,19 @@ def train_batch(self, pre_train_net=False): self._session.run(self.pre_train_op, {self.input: batch_input, self.subreddit_input: batch_sub, - self.sec_target: batch_label}) + self.sec_target: batch_label, + self.keep_prob: self.dropout_prob}) elif pre_train_net: self._session.run(self.pre_train_op, {self.input: batch_input, - self.sec_target: batch_label}) + self.sec_target: batch_label, + self.keep_prob: self.dropout_prob}) else: self._session.run(self.train_op, {self.input: batch_input, self.subreddit_input: batch_sub, - self.target: batch_label}) + self.target: batch_label, + self.keep_prob: self.dropout_prob}) def close_writers(self): """ Close tensorboard writers """ diff --git a/model/model_builder.py b/model/model_builder.py index 0a62bc2..98fb212 100644 --- a/model/model_builder.py +++ b/model/model_builder.py @@ -155,7 +155,7 @@ def add_layer(self, number_of_neurons): if self._model.use_dropout: self._model.latest_layer = \ tf.nn.dropout(self._model.latest_layer, - self._model.dropout_prob, + self._model.keep_prob, name="hidden_layer" + str(self.number_of_layers) + "dropout") return self @@ -241,6 +241,9 @@ def add_precision_operations(self): _, self._model.precision_training = \ tf.metrics.precision(self._model.target, self._model.predictions) + _, self._model.precision_testing = \ + tf.metrics.precision(self._model.target, + self._model.predictions) # Calculate recall _, self._model.recall_validation = \ tf.metrics.recall(self._model.target, @@ -248,6 +251,9 @@ def add_precision_operations(self): _, self._model.recall_training = \ tf.metrics.recall(self._model.target, self._model.predictions) + _, self._model.recall_testing = \ + tf.metrics.recall(self._model.target, + self._model.predictions) # Calculate F1-score: 2 * (prec * recall) / (prec + recall) self._model.f1_score_validation = \ @@ -274,6 +280,19 @@ def add_precision_operations(self): tf.zeros_like(self._model.f1_score_training), self._model.f1_score_training) + + self._model.f1_score_testing = \ + tf.multiply(2.0, + tf.truediv(tf.multiply(self._model.precision_testing, + self._model.recall_testing), + tf.add(self._model.precision_testing, + self._model.recall_testing))) + # Convert to 0 if f1 score is NaN + self._model.f1_score_testing = \ + tf.where(tf.is_nan(self._model.f1_score_testing), + tf.zeros_like(self._model.f1_score_testing), + self._model.f1_score_testing) + self._model.error_sum = \ tf.summary.scalar('cross_entropy', self._model.error) @@ -285,6 +304,9 @@ def add_precision_operations(self): self._model.prec_sum_training = \ tf.summary.scalar('precision_training', self._model.precision_training) + self._model.prec_sum_testing = \ + tf.summary.scalar('precision_testing', + self._model.precision_testing) self._model.recall_sum_validation = \ tf.summary.scalar('recall_validation', @@ -292,6 +314,9 @@ def add_precision_operations(self): self._model.recall_sum_training = \ tf.summary.scalar('recall_training', self._model.recall_training) + self._model.recall_sum_testing = \ + tf.summary.scalar('recall_testing', + self._model.recall_testing) self._model.f1_sum_validation = \ tf.summary.scalar('f1_score_validation', @@ -299,6 +324,9 @@ def add_precision_operations(self): self._model.f1_sum_training = \ tf.summary.scalar('f1_score_training', self._model.f1_score_training) + self._model.f1_sum_testing = \ + tf.summary.scalar('f1_score_testing', + self._model.f1_score_testing) return self diff --git a/model/util/data.py b/model/util/data.py index 22bbc17..0445436 100644 --- a/model/util/data.py +++ b/model/util/data.py @@ -174,7 +174,7 @@ def get_validation(self): """ Get the whole validation set in a vectorized form """ old_ind = self._current_valid_index self._current_valid_index = 0 - batch_x, batch_sub, batch_y = self.next_valid_batch() + batch_x, batch_sub, batch_y = self.next_valid_batch(self.validation_size) self._current_valid_index = old_ind return batch_x, batch_sub, batch_y @@ -224,7 +224,7 @@ def get_testing(self): return batch_x, batch_sub, batch_y def next_test_batch(self, batch_size=None): - """ Get the next batch of validation data """ + """ Get the next batch of test data """ batch_size = batch_size or self.batch_size batch_x = [] batch_sub = [] @@ -241,9 +241,10 @@ def next_test_batch(self, batch_size=None): self._current_test_index = 0 # Turn sentences and labels into vectors - sentence_vec = helper.get_indicies(sentence, - self.word_dict, - self.max_title_length) + sentence_vec, pres, absent = \ + helper.get_indicies(sentence, + self.word_dict, + self.max_title_length) subreddit_vec = helper.label_vector(subreddit, self.subreddit_dict,