From ff5fabcbe64c58ec02047b2c1cecb97f651afca8 Mon Sep 17 00:00:00 2001
From: Yuriy Vaskin <vaskin90@gmail.com>
Date: Thu, 15 Dec 2016 17:20:33 +0300
Subject: [PATCH 1/7] - fix requirements - fix paths - add how to run
 instruction

---
 README.md            | 22 ++++++++++++++++++----
 requirements.txt     |  7 +++++++
 seq2seq/translate.py |  8 +++++---
 3 files changed, 30 insertions(+), 7 deletions(-)
 create mode 100644 requirements.txt

diff --git a/README.md b/README.md
index b9b7069..6facaca 100644
--- a/README.md
+++ b/README.md
@@ -2,10 +2,24 @@
 Set of (Tensorflow) implementations which generate comments from code. Thesis for the B.sc. AI. 
 
 # How to execute:
-## Seq2seq: 
-- Enter tf: source ~/tensorflow/bin/activate
-- Execute code: python translate.py --size=256 --num_layers=3 --steps_per_checkpoint=50 --bleu
-- Or interactive mode (only works when the model has been trained): python translate.py --size=350 --num_layers=3 --step_per_checkpoint=50 --decode
+## Seq2seq:
+- create/activate virtualenv
+```bash
+source ~/tensorflow/bin/activate
+```
+- install requirements
+```bash
+pip install -r requirements.txt
+```
+- Execute code:
+```bash
+python translate.py --size=256 --num_layers=3 --steps_per_checkpoint=50 --bleu
+```
+
+- Or interactive mode (only works when the model has been trained): 
+```bash
+python translate.py --size=350 --num_layers=3 --step_per_checkpoint=50 --decode
+```
 
 ### Options
 - add --evaluate to see the score with a trained model on the development file (default False)
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..22db177
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,7 @@
+funcsigs==1.0.2
+mock==2.0.0
+numpy==1.11.2
+pbr==1.10.0
+protobuf==3.1.0
+six==1.10.0
+tensorflow==0.12.0rc1
diff --git a/seq2seq/translate.py b/seq2seq/translate.py
index ee42e6f..0cde51a 100644
--- a/seq2seq/translate.py
+++ b/seq2seq/translate.py
@@ -52,7 +52,9 @@
 from evaluation.meteor.meteor import Meteor
 
 import warnings
-warnings.filterwarnings("ignore", category=DeprecationWarning) 
+warnings.filterwarnings("ignore", category=DeprecationWarning)
+
+DATA_PATH = os.path.dirname(os.path.abspath(__file__))
 
 
 tf.app.flags.DEFINE_float("learning_rate", 0.5, "Learning rate.")
@@ -66,8 +68,8 @@
 tf.app.flags.DEFINE_integer("num_layers", 1, "Number of layers in the model.")
 tf.app.flags.DEFINE_integer("code_vocab_size", 100000, "Program vocabulary size.")
 tf.app.flags.DEFINE_integer("en_vocab_size", 100000, "English vocabulary size.")
-tf.app.flags.DEFINE_string("data_dir", "/home/tjalling/Desktop/thesis/tensorflow/implementations/seq2seq/data/", "Data directory")
-tf.app.flags.DEFINE_string("train_dir", "/home/tjalling/Desktop/thesis/tensorflow/implementations/seq2seq/train/", "Training directory.")
+tf.app.flags.DEFINE_string("data_dir", os.path.join(DATA_PATH, "data/"), "Data directory")
+tf.app.flags.DEFINE_string("train_dir", os.path.join(DATA_PATH, "data/"), "Training directory.")
 tf.app.flags.DEFINE_string("dataset", "allCode", "Specify the name of which dataset to use.")
 tf.app.flags.DEFINE_string("dev_files", "dev/10pt.random", "The file path to the English dev file, relative from the data_dir.")
 tf.app.flags.DEFINE_string("translated_dev_code", "dev/translated.en", "The dev file with Code translated into English.")

From c6239cd6087c558623aae762acea89fbc96219cf Mon Sep 17 00:00:00 2001
From: Yuriy Vaskin <vaskin90@gmail.com>
Date: Fri, 16 Dec 2016 20:58:12 +0300
Subject: [PATCH 2/7] Add logging

---
 seq2seq/translate.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/seq2seq/translate.py b/seq2seq/translate.py
index 0cde51a..a2675b4 100644
--- a/seq2seq/translate.py
+++ b/seq2seq/translate.py
@@ -52,6 +52,11 @@
 from evaluation.meteor.meteor import Meteor
 
 import warnings
+import logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s',
+                    datefmt='%m-%d %H:%M',)
+logger = logging.getLogger(__file__)
+
 warnings.filterwarnings("ignore", category=DeprecationWarning)
 
 DATA_PATH = os.path.dirname(os.path.abspath(__file__))
@@ -181,7 +186,7 @@ def translate_file(source_path=dev_code_file, target_path=translated_dev_code):
                     # Get output logits for the sentence.
                     _, _, output_logits = model.step(sess, encoder_inputs, decoder_inputs,
                                                                     target_weights, bucket_id, True)
-                                                                    
+
                                                                     
                     # This is a greedy decoder - outputs are just argmaxes of output_logits.
                     outputs = [int(np.argmax(logit, axis=1)) for logit in output_logits]
@@ -415,14 +420,16 @@ def self_test():
 
 
 def main(_):
+    logger.info("start")
     if FLAGS.self_test:
         self_test()
     elif FLAGS.decode:
         decode()
-    elif FLAGS.evaluate:  
+    elif FLAGS.evaluate:
         evaluate()
     else:
         train()
+    logger.info("stop")
 
 if __name__ == "__main__":
     tf.app.run()

From 0d8cc22c8cfbe7216c3447b9e600c1021f3942ac Mon Sep 17 00:00:00 2001
From: Yuriy Vaskin <vaskin90@gmail.com>
Date: Sat, 24 Dec 2016 18:49:27 +0300
Subject: [PATCH 3/7] Null and out of range checks Deprecated methods Readme
 update

---
 README.md                               |  1 +
 dataset_generation/getDocStrings.py     | 12 ++++++------
 ptr/main.py                             |  2 +-
 seq2seq/evaluation/bleu/multi-bleu.perl |  9 ++++++---
 seq2seq/seq2seq_model.py                |  2 +-
 seq2seq/translate.py                    |  8 ++++----
 6 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/README.md b/README.md
index 6facaca..cc747fe 100644
--- a/README.md
+++ b/README.md
@@ -9,6 +9,7 @@ source ~/tensorflow/bin/activate
 ```
 - install requirements
 ```bash
+pip install --upgrade pip
 pip install -r requirements.txt
 ```
 - Execute code:
diff --git a/dataset_generation/getDocStrings.py b/dataset_generation/getDocStrings.py
index c08c593..34e50dc 100644
--- a/dataset_generation/getDocStrings.py
+++ b/dataset_generation/getDocStrings.py
@@ -1,6 +1,6 @@
 from os.path import basename, splitext
 import sys
-import util 
+import util
 
 
 commentList = ["# ", "#!"]
@@ -83,7 +83,7 @@ def filterDocString(source, startLine, codeFile, commentFile, maxBucket):
     # loop through all the lines in the source, get the comment 
     # and the corresponding code
     with open(commentFile, "a") as commentF:
-        with open(codeFile, "a") as codeF:      
+        with open(codeFile, "a") as codeF:
             for i in xrange(startLine, len(source)):
                 # print "i in comment loop is:" , i
                 globalI = i
@@ -117,10 +117,10 @@ def filterDocString(source, startLine, codeFile, commentFile, maxBucket):
 
                     # first if we are at another indentation level, we found an deeper
                     # docstring, thus exit
-                    if currIndent != indentation or not inComment: 
+                    if currIndent != indentation or not inComment:
                         # print ">>>It is a new comment, return error"
                         return(i,False)
-                    
+
                     # otherwise end the comment
                     else:
                         # print ">>>Closed comment"
@@ -153,7 +153,7 @@ def filterDocString(source, startLine, codeFile, commentFile, maxBucket):
                     commentF.write(util.cleanComment(comment) + "\n!@#$%!@#$%!@#$%!@#$%!@#$%")
 
                     return(i, True)
-                
+
                 # if we are still here, add the current line to the code
                 code.append(line.strip())
 
@@ -200,6 +200,6 @@ def isDef(source, startLine, i):
 
 if __name__ == '__main__':
     import sys
-    
+
     with open(sys.argv[1]) as fp:
         make_pairs(fp)
\ No newline at end of file
diff --git a/ptr/main.py b/ptr/main.py
index a8210f6..5494aea 100644
--- a/ptr/main.py
+++ b/ptr/main.py
@@ -133,7 +133,7 @@ def step(self):
         with tf.Session(config = tf.ConfigProto(gpu_options = gpu_options)) as sess:
             merged = tf.merge_all_summaries()
             writer = tf.train.SummaryWriter("/tmp/pointer_logs", sess.graph)
-            init = tf.initialize_all_variables()
+            init = tf.global_variables_initializer()
             sess.run(init)
             for i in range(10000):
                 encoder_input_data, decoder_input_data, targets_data = dataset.next_batch(
diff --git a/seq2seq/evaluation/bleu/multi-bleu.perl b/seq2seq/evaluation/bleu/multi-bleu.perl
index 8a1871a..778efba 100644
--- a/seq2seq/evaluation/bleu/multi-bleu.perl
+++ b/seq2seq/evaluation/bleu/multi-bleu.perl
@@ -149,9 +149,12 @@ sub add_to_ref {
   printf "BLEU = 0, 0/0/0/0 (BP=0, ratio=0, hyp_len=0, ref_len=0)\n";
   exit(1);
 }
-
-if ($length_translation<$length_reference) {
-  $brevity_penalty = exp(1-$length_reference/$length_translation);
+if($length_translation==0){
+  $brevity_penalty = exp(0);
+}else{
+ if ($length_translation<$length_reference) {
+   $brevity_penalty = exp(1-$length_reference/$length_translation);
+ }
 }
 $bleu = $brevity_penalty * exp((my_log( $bleu[1] ) +
 				my_log( $bleu[2] ) +
diff --git a/seq2seq/seq2seq_model.py b/seq2seq/seq2seq_model.py
index 12a400e..1809110 100644
--- a/seq2seq/seq2seq_model.py
+++ b/seq2seq/seq2seq_model.py
@@ -175,7 +175,7 @@ def seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
         self.updates.append(opt.apply_gradients(
             zip(clipped_gradients, params), global_step=self.global_step))
 
-    self.saver = tf.train.Saver(tf.all_variables())
+    self.saver = tf.train.Saver(tf.global_variables())
 
   def step(self, session, encoder_inputs, decoder_inputs, target_weights,
            bucket_id, forward_only):
diff --git a/seq2seq/translate.py b/seq2seq/translate.py
index a2675b4..64d4812 100644
--- a/seq2seq/translate.py
+++ b/seq2seq/translate.py
@@ -196,7 +196,7 @@ def translate_file(source_path=dev_code_file, target_path=translated_dev_code):
                         outputs = outputs[:outputs.index(data_utils.EOS_ID)]
                         
                     # Write translated sentence to translation file.
-                    translated_file.write(" ".join([tf.compat.as_str(rev_en_vocab[output]) for output in outputs]) + "\n")
+                    translated_file.write(" ".join([tf.compat.as_str(rev_en_vocab[output]) for output in outputs if output in rev_en_vocab]) + "\n")
                     
                     # print ("> %s" % sentence)
                     # print(" ".join([tf.compat.as_str(rev_en_vocab[output]) for output in outputs]))
@@ -224,7 +224,7 @@ def create_model(session, forward_only):
     model.saver.restore(session, ckpt.model_checkpoint_path)
   else:
     print("Created model with fresh parameters.")
-    session.run(tf.initialize_all_variables())
+    session.run(tf.global_variables_initializer())
   return model
 
 
@@ -393,7 +393,7 @@ def decode():
             if data_utils.EOS_ID in outputs:
                 outputs = outputs[:outputs.index(data_utils.EOS_ID)]
             # Print out French sentence corresponding to outputs.
-            print(" ".join([tf.compat.as_str(rev_en_vocab[output]) for output in outputs]))
+            print(" ".join([tf.compat.as_str(rev_en_vocab[output]) for output in outputs if output in rev_en_vocab]))
             print("> ", end="")
             sys.stdout.flush()
             sentence = sys.stdin.readline()
@@ -406,7 +406,7 @@ def self_test():
     # Create model with vocabularies of 10, 2 small buckets, 2 layers of 32.
     model = seq2seq_model.Seq2SeqModel(10, 10, [(3, 3), (6, 6)], 32, 2,
                                        5.0, 32, 0.3, 0.99, num_samples=8)
-    sess.run(tf.initialize_all_variables())
+    sess.run(tf.global_variables_initializer())
 
     # Fake data set for both the (3, 3) and (6, 6) bucket.
     data_set = ([([1, 1], [2, 2]), ([3, 3], [4]), ([5], [6])],

From 79e4d952974be791769b68078030a657b72c0603 Mon Sep 17 00:00:00 2001
From: Yuriy Vaskin <vaskin90@gmail.com>
Date: Sat, 24 Dec 2016 19:14:42 +0300
Subject: [PATCH 4/7] Mode logging and running instructions

---
 .idea/misc.xml       |  4 ++++
 README.md            | 16 ++++++++++------
 seq2seq/translate.py |  8 ++++++--
 3 files changed, 20 insertions(+), 8 deletions(-)
 create mode 100644 .idea/misc.xml

diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..b30885f
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 2.7.10 virtualenv at ~/up/code_comments/code-to-comment/.code_to_comment (1)" project-jdk-type="Python SDK" />
+</project>
\ No newline at end of file
diff --git a/README.md b/README.md
index cc747fe..e20f674 100644
--- a/README.md
+++ b/README.md
@@ -3,23 +3,27 @@ Set of (Tensorflow) implementations which generate comments from code. Thesis fo
 
 # How to execute:
 ## Seq2seq:
-- create/activate virtualenv
+- Create/activate virtualenv
 ```bash
 source ~/tensorflow/bin/activate
 ```
-- install requirements
+- Install requirements
 ```bash
 pip install --upgrade pip
 pip install -r requirements.txt
 ```
-- Execute code:
+- Run training. Note, the trainig process has infinite loop in it:
 ```bash
-python translate.py --size=256 --num_layers=3 --steps_per_checkpoint=50 --bleu
+python translate.py --num_layers=3
+```
+- Run evaluation (only works when the model has been trained): 
+```bash
+python translate.py --num_layers=3 --evaluate
 ```
 
-- Or interactive mode (only works when the model has been trained): 
+- Run interactive translation mode (only works when the model has been trained): 
 ```bash
-python translate.py --size=350 --num_layers=3 --step_per_checkpoint=50 --decode
+python translate.py --num_layers=3 --step_per_checkpoint=50 --decode
 ```
 
 ### Options
diff --git a/seq2seq/translate.py b/seq2seq/translate.py
index 64d4812..2c4c840 100644
--- a/seq2seq/translate.py
+++ b/seq2seq/translate.py
@@ -420,16 +420,20 @@ def self_test():
 
 
 def main(_):
-    logger.info("start")
+    logger.info("Start")
     if FLAGS.self_test:
+        logger.info("Self testing")
         self_test()
     elif FLAGS.decode:
+        logger.info("Decoding")
         decode()
     elif FLAGS.evaluate:
+        logger.info("Evaluating")
         evaluate()
     else:
+        logger.info("Training")
         train()
-    logger.info("stop")
+    logger.info("Stop")
 
 if __name__ == "__main__":
     tf.app.run()

From c9ff532b62de8a04d92f73ca55d036c7dd88a564 Mon Sep 17 00:00:00 2001
From: Yuriy Vaskin <vaskin90@gmail.com>
Date: Sat, 24 Dec 2016 19:24:59 +0300
Subject: [PATCH 5/7] Refactored flags for clarity

---
 seq2seq/translate.py | 29 ++++++++++++++++-------------
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/seq2seq/translate.py b/seq2seq/translate.py
index 2c4c840..b3174eb 100644
--- a/seq2seq/translate.py
+++ b/seq2seq/translate.py
@@ -61,6 +61,21 @@
 
 DATA_PATH = os.path.dirname(os.path.abspath(__file__))
 
+tf.app.flags.DEFINE_boolean("decode", False,
+                            "Set to True for interactive decoding.")
+tf.app.flags.DEFINE_boolean("evaluate", False,
+                            "Run evaluation metrics on the output.")
+tf.app.flags.DEFINE_boolean("self_test", False,
+                            "Run a self-test if this is set to True.")
+
+tf.app.flags.DEFINE_integer("num_layers", 1, "Number of layers in the model.")
+tf.app.flags.DEFINE_integer("steps_per_checkpoint", 200,
+                            "How many training steps to do per checkpoint.")
+
+tf.app.flags.DEFINE_string("data_dir", os.path.join(DATA_PATH, "data/"), "Data directory")
+tf.app.flags.DEFINE_string("train_dir", os.path.join(DATA_PATH, "data/"), "Training directory.")
+tf.app.flags.DEFINE_string("dataset", "allCode", "Specify the name of which dataset to use.")
+tf.app.flags.DEFINE_string("dev_files", "dev/10pt.random", "The file path to the English dev file, relative from the data_dir.")
 
 tf.app.flags.DEFINE_float("learning_rate", 0.5, "Learning rate.")
 tf.app.flags.DEFINE_float("learning_rate_decay_factor", 0.99,
@@ -70,24 +85,12 @@
 tf.app.flags.DEFINE_integer("batch_size", 64,
                             "Batch size to use during training.")
 tf.app.flags.DEFINE_integer("size", 256, "Size of each model layer.")
-tf.app.flags.DEFINE_integer("num_layers", 1, "Number of layers in the model.")
 tf.app.flags.DEFINE_integer("code_vocab_size", 100000, "Program vocabulary size.")
 tf.app.flags.DEFINE_integer("en_vocab_size", 100000, "English vocabulary size.")
-tf.app.flags.DEFINE_string("data_dir", os.path.join(DATA_PATH, "data/"), "Data directory")
-tf.app.flags.DEFINE_string("train_dir", os.path.join(DATA_PATH, "data/"), "Training directory.")
-tf.app.flags.DEFINE_string("dataset", "allCode", "Specify the name of which dataset to use.")
-tf.app.flags.DEFINE_string("dev_files", "dev/10pt.random", "The file path to the English dev file, relative from the data_dir.")
 tf.app.flags.DEFINE_string("translated_dev_code", "dev/translated.en", "The dev file with Code translated into English.")
 tf.app.flags.DEFINE_integer("max_train_data_size", 0,
                             "Limit on the size of training data (0: no limit).")
-tf.app.flags.DEFINE_integer("steps_per_checkpoint", 200,
-                            "How many training steps to do per checkpoint.")
-tf.app.flags.DEFINE_boolean("decode", False,
-                            "Set to True for interactive decoding.")
-tf.app.flags.DEFINE_boolean("self_test", False,
-                            "Run a self-test if this is set to True.")
-tf.app.flags.DEFINE_boolean("evaluate", False, 
-                            "Run evaluation metrics on the output.")
+
 
 FLAGS = tf.app.flags.FLAGS
 data_dir = FLAGS.data_dir  + FLAGS.dataset + "/"

From adec52f301cf17fa753d608d7095b88be360339d Mon Sep 17 00:00:00 2001
From: Yuriy Vaskin <vaskin90@gmail.com>
Date: Sun, 25 Dec 2016 13:22:45 +0300
Subject: [PATCH 6/7] Restore path check

---
 seq2seq/translate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/seq2seq/translate.py b/seq2seq/translate.py
index b3174eb..7fd53d1 100644
--- a/seq2seq/translate.py
+++ b/seq2seq/translate.py
@@ -222,7 +222,7 @@ def create_model(session, forward_only):
       FLAGS.learning_rate, FLAGS.learning_rate_decay_factor,
       forward_only=forward_only)
   ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir)
-  if ckpt and tf.gfile.Exists(ckpt.model_checkpoint_path):
+  if ckpt and ckpt.model_checkpoint_path:
     print("Reading model parameters from %s" % ckpt.model_checkpoint_path)
     model.saver.restore(session, ckpt.model_checkpoint_path)
   else:

From e5497c5b7b6f362e0ff8205dd1eae5c4ed3ddcf4 Mon Sep 17 00:00:00 2001
From: Yuriy Vaskin <vaskin90@gmail.com>
Date: Sun, 25 Dec 2016 13:56:07 +0300
Subject: [PATCH 7/7] Cleanup

---
 .idea/misc.xml | 4 ----
 1 file changed, 4 deletions(-)
 delete mode 100644 .idea/misc.xml

diff --git a/.idea/misc.xml b/.idea/misc.xml
deleted file mode 100644
index b30885f..0000000
--- a/.idea/misc.xml
+++ /dev/null
@@ -1,4 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 2.7.10 virtualenv at ~/up/code_comments/code-to-comment/.code_to_comment (1)" project-jdk-type="Python SDK" />
-</project>
\ No newline at end of file