Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,17 @@ Examples are:
> python main.py 1 2 4
```

## Testing
After modeling is complete, testing can be performed on the specified testing dataset. To test a model given specific configurations, do:
```
> python main.py <config ids> --testing
```
To test all configurations, simply run:
```
> python main.py --testing
```
For testing to be successful, an existing (and trained) model should exist in the `logs` directory with the same name as in the configuration file for the given configuration id.

## Application Endpoint
There is a simple RESTful endpoint that can be used to make predictions on a specified title. To use the endpoint the model has to be trained alreay. Once the network is trained and ready to make some predictions, run
```
Expand Down
8 changes: 6 additions & 2 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ def main():
parser.add_argument('configs', metavar='C', type=int, nargs='*',
help='Config number to use (can be multiple)')
parser.add_argument('--application', action='store_true')
parser.add_argument('--testing', action='store_true')
args = parser.parse_args()
if args.application:
conf_num = args.configs[0] if args.configs else 0
Expand All @@ -49,9 +50,12 @@ def main():
builder = ModelBuilder(config_file, sess)

network_model = builder.build()
if config_file[USE_PRETRAINED_NET]:
if config_file[USE_PRETRAINED_NET] and not args.testing:
network_model.train(USE_PRETRAINED_NET)
network_model.train()
if args.testing:
network_model.test()
else:
network_model.train()
network_model.close_writers()
tf.reset_default_graph()
except Exception as e:
Expand Down
33 changes: 30 additions & 3 deletions model/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,9 @@ def __init__(self, config, session):
self.error_sum = None
self.recall_sum_validation = None
self.f1_sum_validation = None
self.prec_sum_testing = None
self.recall_sum_testing = None
self.f1_sum_testing = None

self.logging_dir = build_structure(config)
self.checkpoints_dir = self.logging_dir + '/' + CHECKPOINTS_DIR + '/' + "models.ckpt"
Expand Down Expand Up @@ -150,6 +153,27 @@ def predict(self, title, subreddit='UNK'):
result.append(self.data.rev_users_dict[i])
return result

def test(self):
""" Tests the model using the testing set """
print("Starting testing...")

test_data, test_sub, test_labels = self.data.get_testing()
test_prec, test_err, test_recall, test_f1 = \
self._session.run([self.prec_sum_testing,
self.error_sum,
self.recall_sum_testing,
self.f1_sum_testing],
{self.input: test_data,
self.subreddit_input: test_sub,
self.target: test_labels,
self.keep_prob: 1.0})


print("Testing Precision: ", get_val_summary_tensor(test_prec))
print("Testing Recall: ", get_val_summary_tensor(test_recall))
print("Testing F1: ", get_val_summary_tensor(test_f1))
print("Testing Error: ", get_val_summary_tensor(test_err))

def validate(self):
""" Validates the model and returns the final precision """
print("Starting validation...")
Expand Down Expand Up @@ -283,16 +307,19 @@ def train_batch(self, pre_train_net=False):
self._session.run(self.pre_train_op,
{self.input: batch_input,
self.subreddit_input: batch_sub,
self.sec_target: batch_label})
self.sec_target: batch_label,
self.keep_prob: self.dropout_prob})
elif pre_train_net:
self._session.run(self.pre_train_op,
{self.input: batch_input,
self.sec_target: batch_label})
self.sec_target: batch_label,
self.keep_prob: self.dropout_prob})
else:
self._session.run(self.train_op,
{self.input: batch_input,
self.subreddit_input: batch_sub,
self.target: batch_label})
self.target: batch_label,
self.keep_prob: self.dropout_prob})

def close_writers(self):
""" Close tensorboard writers """
Expand Down
30 changes: 29 additions & 1 deletion model/model_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ def add_layer(self, number_of_neurons):
if self._model.use_dropout:
self._model.latest_layer = \
tf.nn.dropout(self._model.latest_layer,
self._model.dropout_prob,
self._model.keep_prob,
name="hidden_layer" + str(self.number_of_layers) + "dropout")

return self
Expand Down Expand Up @@ -241,13 +241,19 @@ def add_precision_operations(self):
_, self._model.precision_training = \
tf.metrics.precision(self._model.target,
self._model.predictions)
_, self._model.precision_testing = \
tf.metrics.precision(self._model.target,
self._model.predictions)
# Calculate recall
_, self._model.recall_validation = \
tf.metrics.recall(self._model.target,
self._model.predictions)
_, self._model.recall_training = \
tf.metrics.recall(self._model.target,
self._model.predictions)
_, self._model.recall_testing = \
tf.metrics.recall(self._model.target,
self._model.predictions)

# Calculate F1-score: 2 * (prec * recall) / (prec + recall)
self._model.f1_score_validation = \
Expand All @@ -274,6 +280,19 @@ def add_precision_operations(self):
tf.zeros_like(self._model.f1_score_training),
self._model.f1_score_training)


self._model.f1_score_testing = \
tf.multiply(2.0,
tf.truediv(tf.multiply(self._model.precision_testing,
self._model.recall_testing),
tf.add(self._model.precision_testing,
self._model.recall_testing)))
# Convert to 0 if f1 score is NaN
self._model.f1_score_testing = \
tf.where(tf.is_nan(self._model.f1_score_testing),
tf.zeros_like(self._model.f1_score_testing),
self._model.f1_score_testing)

self._model.error_sum = \
tf.summary.scalar('cross_entropy', self._model.error)

Expand All @@ -285,20 +304,29 @@ def add_precision_operations(self):
self._model.prec_sum_training = \
tf.summary.scalar('precision_training',
self._model.precision_training)
self._model.prec_sum_testing = \
tf.summary.scalar('precision_testing',
self._model.precision_testing)

self._model.recall_sum_validation = \
tf.summary.scalar('recall_validation',
self._model.recall_validation)
self._model.recall_sum_training = \
tf.summary.scalar('recall_training',
self._model.recall_training)
self._model.recall_sum_testing = \
tf.summary.scalar('recall_testing',
self._model.recall_testing)

self._model.f1_sum_validation = \
tf.summary.scalar('f1_score_validation',
self._model.f1_score_validation)
self._model.f1_sum_training = \
tf.summary.scalar('f1_score_training',
self._model.f1_score_training)
self._model.f1_sum_testing = \
tf.summary.scalar('f1_score_testing',
self._model.f1_score_testing)

return self

Expand Down
11 changes: 6 additions & 5 deletions model/util/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ def get_validation(self):
""" Get the whole validation set in a vectorized form """
old_ind = self._current_valid_index
self._current_valid_index = 0
batch_x, batch_sub, batch_y = self.next_valid_batch()
batch_x, batch_sub, batch_y = self.next_valid_batch(self.validation_size)
self._current_valid_index = old_ind
return batch_x, batch_sub, batch_y

Expand Down Expand Up @@ -224,7 +224,7 @@ def get_testing(self):
return batch_x, batch_sub, batch_y

def next_test_batch(self, batch_size=None):
""" Get the next batch of validation data """
""" Get the next batch of test data """
batch_size = batch_size or self.batch_size
batch_x = []
batch_sub = []
Expand All @@ -241,9 +241,10 @@ def next_test_batch(self, batch_size=None):
self._current_test_index = 0

# Turn sentences and labels into vectors
sentence_vec = helper.get_indicies(sentence,
self.word_dict,
self.max_title_length)
sentence_vec, pres, absent = \
helper.get_indicies(sentence,
self.word_dict,
self.max_title_length)

subreddit_vec = helper.label_vector(subreddit,
self.subreddit_dict,
Expand Down