Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .env
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Add Environment Variables

DEBUG=False
SECRET_KEY=5(15ds+i2+%ik6z&!yer+ga9m=e%jcqiz_5wszg)r-z!2--b2d
API_KEY=apikey
34 changes: 30 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,36 @@
Set of (Tensorflow) implementations which generate comments from code. Thesis for the B.sc. AI.

# How to execute:
## Seq2seq:
- Enter tf: source ~/tensorflow/bin/activate
- Execute code: python translate.py --size=256 --num_layers=3 --steps_per_checkpoint=50 --bleu
- Or interactive mode (only works when the model has been trained): python translate.py --size=350 --num_layers=3 --step_per_checkpoint=50 --decode
## Seq2seq:
- Create/activate virtualenv
```bash
source ~/tensorflow/bin/activate
```
- Install requirements
```bash
pip install --upgrade pip
pip install -r seq2se2/requirements.txt
```
- Run training. Note, the trainig process has infinite loop in it:
```bash
python translate.py --num_layers=3
```
- Run evaluation (only works when the model has been trained):
```bash
python translate.py --num_layers=3 --evaluate
```

- Run interactive translation mode (only works when the model has been trained):
```bash
python translate.py --num_layers=3 --decode
```

- Run flask app with web server in Docker
```bash
deocker-compose up -d
```

The server is up at localhost port 80 by default. The welcome page is awailable for tests

### Options
- add --evaluate to see the score with a trained model on the development file (default False)
Expand Down
12 changes: 6 additions & 6 deletions dataset_generation/getDocStrings.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from os.path import basename, splitext
import sys
import util
import util


commentList = ["# ", "#!"]
Expand Down Expand Up @@ -83,7 +83,7 @@ def filterDocString(source, startLine, codeFile, commentFile, maxBucket):
# loop through all the lines in the source, get the comment
# and the corresponding code
with open(commentFile, "a") as commentF:
with open(codeFile, "a") as codeF:
with open(codeFile, "a") as codeF:
for i in xrange(startLine, len(source)):
# print "i in comment loop is:" , i
globalI = i
Expand Down Expand Up @@ -117,10 +117,10 @@ def filterDocString(source, startLine, codeFile, commentFile, maxBucket):

# first if we are at another indentation level, we found an deeper
# docstring, thus exit
if currIndent != indentation or not inComment:
if currIndent != indentation or not inComment:
# print ">>>It is a new comment, return error"
return(i,False)

# otherwise end the comment
else:
# print ">>>Closed comment"
Expand Down Expand Up @@ -153,7 +153,7 @@ def filterDocString(source, startLine, codeFile, commentFile, maxBucket):
commentF.write(util.cleanComment(comment) + "\n!@#$%!@#$%!@#$%!@#$%!@#$%")

return(i, True)

# if we are still here, add the current line to the code
code.append(line.strip())

Expand Down Expand Up @@ -200,6 +200,6 @@ def isDef(source, startLine, i):

if __name__ == '__main__':
import sys

with open(sys.argv[1]) as fp:
make_pairs(fp)
21 changes: 21 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
seq2seq:
restart: always
build: ./seq2seq
expose:
- "8000"
volumes:
- /usr/src/app/static
env_file: .env
command: /usr/local/bin/gunicorn -w 2 -b :8000 app:app

nginx:
restart: always
build: ./nginx/
ports:
- "80:80"
volumes:
- /www/static
volumes_from:
- seq2seq
links:
- seq2seq:seq2seq
3 changes: 3 additions & 0 deletions nginx/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
FROM tutum/nginx
RUN rm /etc/nginx/sites-enabled/default
ADD sites-enabled/ /etc/nginx/sites-enabled
17 changes: 17 additions & 0 deletions nginx/sites-enabled/flask_project
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
server {

listen 80;
server_name example.org;
charset utf-8;

location /static {
alias /usr/src/app/static;
}

location / {
proxy_pass http://seq2seq:8000;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
}
}
2 changes: 1 addition & 1 deletion ptr/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ def step(self):
with tf.Session(config = tf.ConfigProto(gpu_options = gpu_options)) as sess:
merged = tf.merge_all_summaries()
writer = tf.train.SummaryWriter("/tmp/pointer_logs", sess.graph)
init = tf.initialize_all_variables()
init = tf.global_variables_initializer()
sess.run(init)
for i in range(10000):
encoder_input_data, decoder_input_data, targets_data = dataset.next_batch(
Expand Down
1 change: 1 addition & 0 deletions seq2seq/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
FROM python:2.7-onbuild
99 changes: 99 additions & 0 deletions seq2seq/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
# app.py

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from flask import Flask
from flask import Response
from flask import abort
from flask import make_response
from flask import request, render_template
from flask.ext.sqlalchemy import SQLAlchemy
from config import BaseConfig

from translate import *

app = Flask(__name__)
app.config.from_object(BaseConfig)
db = SQLAlchemy(app)

sess = None
model = None
code_vocab_path = None
en_vocab_path = None
code_vocab = None
rev_en_vocab = None
_buckets = [(5, 10), (10, 15), (20, 25), (40, 50), (250,100)]

tf.app.flags.FLAGS.num_layers = 3
FLAGS = tf.app.flags.FLAGS


def trans(sentence):
if not model or not sess or not sentence:
return None
# Get token-ids for the input sentence.
token_ids = data_utils.sentence_to_token_ids(tf.compat.as_bytes(sentence), code_vocab)

# print (token_ids)

# Which bucket does it belong to?
bucket_id = min([b for b in xrange(len(_buckets))
if _buckets[b][0] > len(token_ids)])
# Get a 1-element batch to feed the sentence to the model.
encoder_inputs, decoder_inputs, target_weights = model.get_batch(
{bucket_id: [(token_ids, [])]}, bucket_id)

# Get output logits for the sentence.
_, _, output_logits = model.step(sess, encoder_inputs, decoder_inputs,
target_weights, bucket_id, True)
# This is a greedy decoder - outputs are just argmaxes of output_logits.
outputs = [int(np.argmax(logit, axis=1)) for logit in output_logits]

# If there is an EOS symbol in outputs, cut them at that point.
if data_utils.EOS_ID in outputs:
outputs = outputs[:outputs.index(data_utils.EOS_ID)]
# Print out French sentence corresponding to outputs.
return " ".join([tf.compat.as_str(rev_en_vocab[output]) for output in outputs if output in rev_en_vocab])


@app.route('/', methods=['GET'])
def index():
return render_template('index.html')


@app.route('/api', methods=['POST'])
def api():

if "key" not in request.values and request.values["key"] != app.config["APIKEY"]:
return Response("Not authorized, please pass 'key' url parameter with APIKEY", 401)

if "q" not in request.values:
return Response("No q parameter", 500)

res = trans(request.values["q"])

if not res:
res = "translation result"

resp = make_response(res)

return resp


if __name__ == '__main__':
if not app.config["DEBUG"]:
sess = tf.Session()
model = create_model(sess, True, FLAGS)
model.batch_size = 1 # We decode one sentence at a time.
#
# Load vocabularies.
code_vocab_path = os.path.join(data_dir,
"vocab%d.code" % FLAGS.code_vocab_size)
en_vocab_path = os.path.join(data_dir,
"vocab%d.en" % FLAGS.en_vocab_size)
code_vocab, _ = data_utils.initialize_vocabulary(code_vocab_path)
_, rev_en_vocab = data_utils.initialize_vocabulary(en_vocab_path)
print("Tensorflow session ready")
app.run()
8 changes: 8 additions & 0 deletions seq2seq/client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import requests


def trans(code, url, apikey):
r = requests.post(url, params={"key": apikey}, data={"q": code})
if r.ok:
return r.content
return ""
13 changes: 13 additions & 0 deletions seq2seq/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# config.py


import os


class BaseConfig(object):
"""
"""
SECRET_KEY = os.environ.get('SECRET_KEY', '5(15ds+i2+%ik6z&!yer+ga9m=e%jcqiz_5wszg)r-z!2--b2d')
DEBUG = os.environ.get('DEBUG', True)
APIKEY = os.environ.get('APIKEY', "apikey")

6 changes: 6 additions & 0 deletions seq2seq/create_db.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# create_db.py


from app import db

db.create_all()
9 changes: 6 additions & 3 deletions seq2seq/evaluation/bleu/multi-bleu.perl
Original file line number Diff line number Diff line change
Expand Up @@ -149,9 +149,12 @@ sub add_to_ref {
printf "BLEU = 0, 0/0/0/0 (BP=0, ratio=0, hyp_len=0, ref_len=0)\n";
exit(1);
}

if ($length_translation<$length_reference) {
$brevity_penalty = exp(1-$length_reference/$length_translation);
if($length_translation==0){
$brevity_penalty = exp(0);
}else{
if ($length_translation<$length_reference) {
$brevity_penalty = exp(1-$length_reference/$length_translation);
}
}
$bleu = $brevity_penalty * exp((my_log( $bleu[1] ) +
my_log( $bleu[2] ) +
Expand Down
17 changes: 17 additions & 0 deletions seq2seq/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
Flask==0.10.1
Flask-SQLAlchemy==2.0
Jinja2==2.7.3
MarkupSafe==0.23
SQLAlchemy==0.9.9
Werkzeug==0.10.4
gunicorn==19.3.0
itsdangerous==0.24
psycopg2==2.6
requests
funcsigs==1.0.2
mock==2.0.0
numpy==1.11.2
pbr==1.10.0
protobuf==3.1.0
six==1.10.0
tensorflow==0.12.0rc1
2 changes: 1 addition & 1 deletion seq2seq/seq2seq_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ def seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
self.updates.append(opt.apply_gradients(
zip(clipped_gradients, params), global_step=self.global_step))

self.saver = tf.train.Saver(tf.all_variables())
self.saver = tf.train.Saver(tf.global_variables())

def step(self, session, encoder_inputs, decoder_inputs, target_weights,
bucket_id, forward_only):
Expand Down
5 changes: 5 additions & 0 deletions seq2seq/static/css/bootstrap.min.css

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions seq2seq/static/css/main.css
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
/* custom styles */

.container {
max-width: 500px;
}
1 change: 1 addition & 0 deletions seq2seq/static/img/.gitkeep
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

7 changes: 7 additions & 0 deletions seq2seq/static/js/bootstrap.min.js

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions seq2seq/static/js/main.js
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
// custom scripts
27 changes: 27 additions & 0 deletions seq2seq/templates/_base.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>Code to comment</title>
<!-- meta -->
<meta name="description" content="">
<meta name="author" content="">
<meta name="viewport" content="width=device-width,initial-scale=1">
<!-- styles -->
<link href="static/css/bootstrap.min.css" rel="stylesheet">
<link href="static/css/main.css" rel="stylesheet">
{% block css %}{% endblock %}
</head>
<body>
<div class="container">
<br>
<!-- child template -->
{% block content %}{% endblock %}
</div>
<!-- scripts -->
<script src="//code.jquery.com/jquery-1.11.2.min.js" type="text/javascript"></script>
<script src="static/js/bootstrap.min.js" type="text/javascript"></script>
<script src="static/js/main.js" type="text/javascript"></script>
{% block js %}{% endblock %}
</body>
</html>
11 changes: 11 additions & 0 deletions seq2seq/templates/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{% extends "_base.html" %}

{% block content %}

<div class="text-center">
<h1>Welcome!</h1>
</div>



{% endblock %}
Loading