thaije · vaskinyy · Dec 15, 2016 · Dec 16, 2016 · Dec 24, 2016 · Dec 24, 2016
diff --git a/.env b/.env
@@ -0,0 +1,5 @@
+# Add Environment Variables
+
+DEBUG=False
+SECRET_KEY=5(15ds+i2+%ik6z&!yer+ga9m=e%jcqiz_5wszg)r-z!2--b2d
+API_KEY=apikey
diff --git a/README.md b/README.md
@@ -2,10 +2,36 @@
 Set of (Tensorflow) implementations which generate comments from code. Thesis for the B.sc. AI. 
 
 # How to execute:
-## Seq2seq: 
-- Enter tf: source ~/tensorflow/bin/activate
-- Execute code: python translate.py --size=256 --num_layers=3 --steps_per_checkpoint=50 --bleu
-- Or interactive mode (only works when the model has been trained): python translate.py --size=350 --num_layers=3 --step_per_checkpoint=50 --decode
+## Seq2seq:
+- Create/activate virtualenv
+```bash
+source ~/tensorflow/bin/activate
+```
+- Install requirements
+```bash
+pip install --upgrade pip
+pip install -r seq2se2/requirements.txt
+```
+- Run training. Note, the trainig process has infinite loop in it:
+```bash
+python translate.py --num_layers=3
+```
+- Run evaluation (only works when the model has been trained): 
+```bash
+python translate.py --num_layers=3 --evaluate
+```
+
+- Run interactive translation mode (only works when the model has been trained): 
+```bash
+python translate.py --num_layers=3 --decode
+```
+
+- Run flask app with web server in Docker
+```bash
+deocker-compose up -d
+```
+
+The server is up at localhost port 80 by default. The welcome page is awailable for tests
 
 ### Options
 - add --evaluate to see the score with a trained model on the development file (default False)

diff --git a/dataset_generation/getDocStrings.py b/dataset_generation/getDocStrings.py
@@ -1,6 +1,6 @@
 from os.path import basename, splitext
 import sys
-import util 
+import util
 
 
 commentList = ["# ", "#!"]
@@ -83,7 +83,7 @@ def filterDocString(source, startLine, codeFile, commentFile, maxBucket):
     # loop through all the lines in the source, get the comment 
     # and the corresponding code
     with open(commentFile, "a") as commentF:
-        with open(codeFile, "a") as codeF:      
+        with open(codeFile, "a") as codeF:
             for i in xrange(startLine, len(source)):
                 # print "i in comment loop is:" , i
                 globalI = i
@@ -117,10 +117,10 @@ def filterDocString(source, startLine, codeFile, commentFile, maxBucket):
 
                     # first if we are at another indentation level, we found an deeper
                     # docstring, thus exit
-                    if currIndent != indentation or not inComment: 
+                    if currIndent != indentation or not inComment:
                         # print ">>>It is a new comment, return error"
                         return(i,False)
-                    
+
                     # otherwise end the comment
                     else:
                         # print ">>>Closed comment"
@@ -153,7 +153,7 @@ def filterDocString(source, startLine, codeFile, commentFile, maxBucket):
                     commentF.write(util.cleanComment(comment) + "\n!@#$%!@#$%!@#$%!@#$%!@#$%")
 
                     return(i, True)
-                
+
                 # if we are still here, add the current line to the code
                 code.append(line.strip())
 
@@ -200,6 +200,6 @@ def isDef(source, startLine, i):
 
 if __name__ == '__main__':
     import sys
-    
+
     with open(sys.argv[1]) as fp:
         make_pairs(fp)
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -0,0 +1,21 @@
+seq2seq:
+  restart: always
+  build: ./seq2seq
+  expose:
+    - "8000"
+  volumes:
+    - /usr/src/app/static
+  env_file: .env
+  command: /usr/local/bin/gunicorn -w 2 -b :8000 app:app
+
+nginx:
+  restart: always
+  build: ./nginx/
+  ports:
+    - "80:80"
+  volumes:
+    - /www/static
+  volumes_from:
+    - seq2seq
+  links:
+    - seq2seq:seq2seq
diff --git a/nginx/Dockerfile b/nginx/Dockerfile
@@ -0,0 +1,3 @@
+FROM tutum/nginx
+RUN rm /etc/nginx/sites-enabled/default
+ADD sites-enabled/ /etc/nginx/sites-enabled
diff --git a/nginx/sites-enabled/flask_project b/nginx/sites-enabled/flask_project
@@ -0,0 +1,17 @@
+server {
+
+    listen 80;
+    server_name example.org;
+    charset utf-8;
+
+    location /static {
+        alias /usr/src/app/static;
+    }
+
+    location / {
+        proxy_pass http://seq2seq:8000;
+        proxy_set_header Host $host;
+        proxy_set_header X-Real-IP $remote_addr;
+        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+    }
+}
diff --git a/ptr/main.py b/ptr/main.py
@@ -133,7 +133,7 @@ def step(self):
         with tf.Session(config = tf.ConfigProto(gpu_options = gpu_options)) as sess:
             merged = tf.merge_all_summaries()
             writer = tf.train.SummaryWriter("/tmp/pointer_logs", sess.graph)
-            init = tf.initialize_all_variables()
+            init = tf.global_variables_initializer()
             sess.run(init)
             for i in range(10000):
                 encoder_input_data, decoder_input_data, targets_data = dataset.next_batch(

diff --git a/seq2seq/Dockerfile b/seq2seq/Dockerfile
@@ -0,0 +1 @@
+FROM python:2.7-onbuild
diff --git a/seq2seq/app.py b/seq2seq/app.py
@@ -0,0 +1,99 @@
+# app.py
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from flask import Flask
+from flask import Response
+from flask import abort
+from flask import make_response
+from flask import request, render_template
+from flask.ext.sqlalchemy import SQLAlchemy
+from config import BaseConfig
+
+from translate import *
+
+app = Flask(__name__)
+app.config.from_object(BaseConfig)
+db = SQLAlchemy(app)
+
+sess = None
+model = None
+code_vocab_path = None
+en_vocab_path = None
+code_vocab = None
+rev_en_vocab = None
+_buckets = [(5, 10), (10, 15), (20, 25), (40, 50), (250,100)]
+
+tf.app.flags.FLAGS.num_layers = 3
+FLAGS = tf.app.flags.FLAGS
+
+
+def trans(sentence):
+    if not model or not sess or not sentence:
+        return None
+    # Get token-ids for the input sentence.
+    token_ids = data_utils.sentence_to_token_ids(tf.compat.as_bytes(sentence), code_vocab)
+
+    # print (token_ids)
+
+    # Which bucket does it belong to?
+    bucket_id = min([b for b in xrange(len(_buckets))
+                    if _buckets[b][0] > len(token_ids)])
+    # Get a 1-element batch to feed the sentence to the model.
+    encoder_inputs, decoder_inputs, target_weights = model.get_batch(
+    {bucket_id: [(token_ids, [])]}, bucket_id)
+
+    # Get output logits for the sentence.
+    _, _, output_logits = model.step(sess, encoder_inputs, decoder_inputs,
+    target_weights, bucket_id, True)
+    # This is a greedy decoder - outputs are just argmaxes of output_logits.
+    outputs = [int(np.argmax(logit, axis=1)) for logit in output_logits]
+
+    # If there is an EOS symbol in outputs, cut them at that point.
+    if data_utils.EOS_ID in outputs:
+        outputs = outputs[:outputs.index(data_utils.EOS_ID)]
+    # Print out French sentence corresponding to outputs.
+    return " ".join([tf.compat.as_str(rev_en_vocab[output]) for output in outputs if output in rev_en_vocab])
+
+
+@app.route('/', methods=['GET'])
+def index():
+    return render_template('index.html')
+
+
+@app.route('/api', methods=['POST'])
+def api():
+
+    if "key" not in request.values and request.values["key"] != app.config["APIKEY"]:
+        return Response("Not authorized, please pass 'key' url parameter with APIKEY", 401)
+
+    if "q" not in request.values:
+        return Response("No q parameter", 500)
+
+    res = trans(request.values["q"])
+
+    if not res:
+        res = "translation result"
+
+    resp = make_response(res)
+
+    return resp
+
+
+if __name__ == '__main__':
+    if not app.config["DEBUG"]:
+        sess = tf.Session()
+        model = create_model(sess, True, FLAGS)
+        model.batch_size = 1  # We decode one sentence at a time.
+        #
+        # Load vocabularies.
+        code_vocab_path = os.path.join(data_dir,
+                                       "vocab%d.code" % FLAGS.code_vocab_size)
+        en_vocab_path = os.path.join(data_dir,
+                                     "vocab%d.en" % FLAGS.en_vocab_size)
+        code_vocab, _ = data_utils.initialize_vocabulary(code_vocab_path)
+        _, rev_en_vocab = data_utils.initialize_vocabulary(en_vocab_path)
+        print("Tensorflow session ready")
+    app.run()
diff --git a/seq2seq/client.py b/seq2seq/client.py
@@ -0,0 +1,8 @@
+import requests
+
+
+def trans(code, url, apikey):
+    r = requests.post(url, params={"key": apikey}, data={"q": code})
+    if r.ok:
+        return r.content
+    return ""
diff --git a/seq2seq/config.py b/seq2seq/config.py
@@ -0,0 +1,13 @@
+# config.py
+
+
+import os
+
+
+class BaseConfig(object):
+    """
+    """
+    SECRET_KEY = os.environ.get('SECRET_KEY', '5(15ds+i2+%ik6z&!yer+ga9m=e%jcqiz_5wszg)r-z!2--b2d')
+    DEBUG = os.environ.get('DEBUG', True)
+    APIKEY = os.environ.get('APIKEY', "apikey")
+
diff --git a/seq2seq/create_db.py b/seq2seq/create_db.py
@@ -0,0 +1,6 @@
+# create_db.py
+
+
+from app import db
+
+db.create_all()
diff --git a/seq2seq/evaluation/bleu/multi-bleu.perl b/seq2seq/evaluation/bleu/multi-bleu.perl
@@ -149,9 +149,12 @@ sub add_to_ref {
   printf "BLEU = 0, 0/0/0/0 (BP=0, ratio=0, hyp_len=0, ref_len=0)\n";
   exit(1);
 }
-
-if ($length_translation<$length_reference) {
-  $brevity_penalty = exp(1-$length_reference/$length_translation);
+if($length_translation==0){
+  $brevity_penalty = exp(0);
+}else{
+ if ($length_translation<$length_reference) {
+   $brevity_penalty = exp(1-$length_reference/$length_translation);
+ }
 }
 $bleu = $brevity_penalty * exp((my_log( $bleu[1] ) +
 				my_log( $bleu[2] ) +

diff --git a/seq2seq/requirements.txt b/seq2seq/requirements.txt
@@ -0,0 +1,17 @@
+Flask==0.10.1
+Flask-SQLAlchemy==2.0
+Jinja2==2.7.3
+MarkupSafe==0.23
+SQLAlchemy==0.9.9
+Werkzeug==0.10.4
+gunicorn==19.3.0
+itsdangerous==0.24
+psycopg2==2.6
+requests
+funcsigs==1.0.2
+mock==2.0.0
+numpy==1.11.2
+pbr==1.10.0
+protobuf==3.1.0
+six==1.10.0
+tensorflow==0.12.0rc1
diff --git a/seq2seq/seq2seq_model.py b/seq2seq/seq2seq_model.py
@@ -175,7 +175,7 @@ def seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
         self.updates.append(opt.apply_gradients(
             zip(clipped_gradients, params), global_step=self.global_step))
 
-    self.saver = tf.train.Saver(tf.all_variables())
+    self.saver = tf.train.Saver(tf.global_variables())
 
   def step(self, session, encoder_inputs, decoder_inputs, target_weights,
            bucket_id, forward_only):

diff --git a/seq2seq/static/css/bootstrap.min.css b/seq2seq/static/css/bootstrap.min.css
diff --git a/seq2seq/static/css/main.css b/seq2seq/static/css/main.css
@@ -0,0 +1,5 @@
+/* custom styles */
+
+.container {
+  max-width: 500px;
+}
diff --git a/seq2seq/static/img/.gitkeep b/seq2seq/static/img/.gitkeep
@@ -0,0 +1 @@
+
diff --git a/seq2seq/static/js/bootstrap.min.js b/seq2seq/static/js/bootstrap.min.js
diff --git a/seq2seq/static/js/main.js b/seq2seq/static/js/main.js
@@ -0,0 +1 @@
+// custom scripts
diff --git a/seq2seq/templates/_base.html b/seq2seq/templates/_base.html
@@ -0,0 +1,27 @@
+<!DOCTYPE html>
+<html>
+  <head>
+    <meta charset="utf-8">
+    <title>Code to comment</title>
+    <!-- meta -->
+    <meta name="description" content="">
+    <meta name="author" content="">
+    <meta name="viewport" content="width=device-width,initial-scale=1">
+    <!-- styles -->
+    <link href="static/css/bootstrap.min.css" rel="stylesheet">
+    <link href="static/css/main.css" rel="stylesheet">
+    {% block css %}{% endblock %}
+  </head>
+  <body>
+    <div class="container">
+      <br>
+      <!-- child template -->
+      {% block content %}{% endblock %}
+    </div>
+    <!-- scripts -->
+    <script src="//code.jquery.com/jquery-1.11.2.min.js" type="text/javascript"></script>
+    <script src="static/js/bootstrap.min.js" type="text/javascript"></script>
+    <script src="static/js/main.js" type="text/javascript"></script>
+    {% block js %}{% endblock %}
+  </body>
+</html>
diff --git a/seq2seq/templates/index.html b/seq2seq/templates/index.html
@@ -0,0 +1,11 @@
+{% extends "_base.html" %}
+
+{% block content %}
+
+<div class="text-center">
+  <h1>Welcome!</h1>
+</div>
+
+
+
+{% endblock %}