From 8fd90cfc1a2b0ae22aaf95dfed5d5dedb11c68ca Mon Sep 17 00:00:00 2001
From: abdelrahmankhedr <abdelrahmankhedr@gmail.com>
Date: Thu, 21 Jul 2022 10:37:16 +0300
Subject: [PATCH 1/9] added a new endpoint that get similarity of phases

---
 .idea/.gitignore    |  8 ++++
 .idea/modules.xml   |  8 ++++
 .idea/nlpserver.iml |  8 ++++
 .idea/vcs.xml       |  6 +++
 nlpserver.py        | 99 +++++++++++++++++++++++++++++++++------------
 5 files changed, 103 insertions(+), 26 deletions(-)
 create mode 100644 .idea/.gitignore
 create mode 100644 .idea/modules.xml
 create mode 100644 .idea/nlpserver.iml
 create mode 100644 .idea/vcs.xml
diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..13566b8
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,8 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..9408255
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/nlpserver.iml" filepath="$PROJECT_DIR$/.idea/nlpserver.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/nlpserver.iml b/.idea/nlpserver.iml
new file mode 100644
index 0000000..c956989
--- /dev/null
+++ b/.idea/nlpserver.iml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="WEB_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..94a25f7
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/nlpserver.py b/nlpserver.py
index 06191bf..13b8484 100644
--- a/nlpserver.py
+++ b/nlpserver.py
@@ -10,7 +10,7 @@
 app = Flask(__name__)
 
 #  configurations
-#app.config['var1'] = 'test'
+# app.config['var1'] = 'test'
 
 default_data = {}
 default_data['web64'] = {
@@ -19,22 +19,24 @@
 		'last_modified': '2019-01-15',
 		'documentation': 'http://nlpserver.web64.com/',
 		'github': 'https://github.com/web64/nlp-server',
-		'endpoints': ['/status','/gensim/summarize', '/polyglot/neighbours', '/langid', '/polyglot/entities', '/polyglot/sentiment', '/newspaper', '/readability', '/spacy/entities', '/afinn'],
+		'endpoints': ['/status', '/gensim/summarize', '/gensim/similarity', '/polyglot/neighbours', '/langid', '/polyglot/entities', '/polyglot/sentiment', '/newspaper', '/readability', '/spacy/entities', '/afinn'],
 	}
 
 default_data['message'] = 'NLP Server by web64.com'
 data = default_data
 
+
 @app.route("/")
 def main():
 	return render_template('form.html')
-	#return jsonify(data)
+	# return jsonify(data)
+
 
 @app.route('/status')
 def status():
 	data = dict(default_data)
 	data['missing_libraries'] = []
-	
+
 	try:
 		import textblob
 	except ImportError:
@@ -48,7 +50,10 @@ def status():
 		import gensim
 	except ImportError:
 		data['missing_libraries'].append('gensim')
-	
+	try:
+		import jieba
+	except ImportError:
+		data['missing_libraries'].append('jieba')
 	try:
 		import newspaper
 	except ImportError:
@@ -63,12 +68,12 @@ def status():
 		import readability
 	except ImportError:
 		data['missing_libraries'].append('readability')
-	
+
 	try:
 		import bs4
 	except ImportError:
 		data['missing_libraries'].append('bs4')
-	
+
 	try:
 		import afinn
 	except ImportError:
@@ -106,7 +111,7 @@ def spacy_entities():
 	if request.method == 'GET':
 		return jsonify(data)
 
-	params = request.form # postdata
+	params = request.form  # postdata
 
 	if not params:
 		data['error'] = 'Missing parameters'
@@ -121,20 +126,20 @@ def spacy_entities():
 	else:
 		lang = params['lang']
 
-	nlp = spacy.load( lang )
-	doc = nlp( params['text'] )
-	data['entities']  = {}
-	
-	counters  = {}
+	nlp = spacy.load(lang)
+	doc = nlp(params['text'])
+	data['entities'] = {}
+
+	counters = {}
 	for ent in doc.ents:
 		if not ent.label_ in data['entities']:
 			data['entities'][ent.label_] = dict()
 			counters[ent.label_] = 0
 		else:
 			counters[ent.label_] += 1
-	
-		data['entities'][ ent.label_ ][ counters[ent.label_] ] =  ent.text
-		#data['entities'][ent.label_].add( ent.text )
+
+		data['entities'][ent.label_][counters[ent.label_]] = ent.text
+		# data['entities'][ent.label_].add( ent.text )
 
 	return jsonify(data)
 
@@ -143,13 +148,11 @@ def spacy_entities():
 def gensim_summarize():
 	from gensim.summarization.summarizer import summarize
 	data = dict(default_data)
-	data['message'] = "Summarize long text - Usage: 'text' POST parameter"
+	data['message'] = "Summarize long text - Usage: 'text' POST parameter"+request.form['text']
+    
 	params = {}
 
-	if request.method == 'GET':
-		return jsonify(data)
-
-	params = request.form # postdata
+	params = request.form  # postdata
 
 	if not params:
 		data['error'] = 'Missing parameters'
@@ -163,17 +166,61 @@ def gensim_summarize():
 		word_count = None
 	else:
 		word_count = int(params['word_count'])
-	
-	data['summarize'] = summarize( text=params['text'], word_count=word_count )
+
+	data['summarize'] = summarize(text=params['text'], word_count=word_count)
 
 	return jsonify(data)
 
 
+@app.route("/gensim/similarity", methods=['GET', 'POST'])
+def gensim_similarity():
+
+    import jieba
+
+    from gensim import corpora, models, similarities
+    data = dict(default_data)
+    data['message'] = "get similarity percentage of phases"
+    
+    params = {}
+        
+    params = request.form  # postdata
+    
+    phases = [params['as'],'abcd efgh']
+
+    keyword = params['event']
+
+    texts = []
+
+    for phase in phases:
+        texts.append(list(jieba.cut(phase)))
+    
+    dictionary = corpora.Dictionary(texts)
+
+    feature_cnt = len(dictionary.token2id)
+
+    corpus = [dictionary.doc2bow(text) for text in texts]
+    
+    tfidf = models.TfidfModel(corpus)
+
+    kw_vector = dictionary.doc2bow( list(jieba.cut(keyword)) )
+
+    index = similarities.SparseMatrixSimilarity(tfidf[corpus], num_features = feature_cnt)
+
+    sim = index[tfidf[kw_vector]]
+
+    data['sim']= sim.tolist()
+    #for i in range(len(sim)):
+        #data['simlarity'+str((i+1))] = str(sim[i])
+
+    return jsonify(data)
+
+
 @app.route("/polyglot/neighbours", methods=['GET'])
 def embeddings():
 	from polyglot.text import Word
 	data = dict(default_data)
 	data['message'] = "Neighbours (Embeddings) - Find neighbors of word API - Parameters: 'word', 'lang' language (default: en)"
+
 	params = {}
 	
 	params['word']= request.args.get('word')
@@ -360,7 +407,7 @@ def readability():
 	
 	data['readability']['title'] = doc.title()
 	data['readability']['short_title'] = doc.short_title()
-	#data['readability']['content'] = doc.content()
+	# data['readability']['content'] = doc.content()
 	data['readability']['article_html'] = doc.summary( html_partial=True )
 
 	soup = BeautifulSoup( data['readability']['article_html'] ) 
@@ -378,7 +425,7 @@ def afinn_sentiment():
 	
 
 	data['afinn'] = 0
-	#data['afinn'] = afinn.score('This is utterly excellent!')
+	# data['afinn'] = afinn.score('This is utterly excellent!')
 
 	params = request.form # postdata
 
@@ -457,7 +504,7 @@ def newspaper():
 	data['newspaper']['source_url'] = article.source_url
 	data['newspaper']['meta_lang'] = article.meta_lang
 
-	#Detect language
+	# Detect language
 	if len(article.text)  > 100:
 		lang_data = langid.classify( article.title + ' ' + article.text ) 
 		data['langid']['language'] = lang_data[0]

From c66f8086b58a15b476984bf231ec5d6c87b46126 Mon Sep 17 00:00:00 2001
From: abdelrahmankhedr <abdelrahmankhedr@gmail.com>
Date: Thu, 21 Jul 2022 10:40:40 +0300
Subject: [PATCH 2/9] update readme file

---
 README.md    | 1 +
 nlpserver.py | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index d4d79a5..8945e74 100644
--- a/README.md
+++ b/README.md
@@ -128,6 +128,7 @@ Endpoint|Method|Parameters|Info|Library
 /polyglot/neighbours|GET|word,lang|Embeddings: neighbouring words|polyglot
 /langid|GET,POST|text|Language detection for provided text|langid
 /gensim/summarize|POST|text,word_count|Summarization of long text|gensim
+/gensim/similarity|POST|text1,text2|Similarity percentage of texts|gensim
 /spacy/entities|POST|text,lang|Entity extraction for provided text in given language|SpaCy
 
 ## Usage
diff --git a/nlpserver.py b/nlpserver.py
index 13b8484..3834baa 100644
--- a/nlpserver.py
+++ b/nlpserver.py
@@ -185,9 +185,9 @@ def gensim_similarity():
         
     params = request.form  # postdata
     
-    phases = [params['as'],'abcd efgh']
+    phases = [params['text1'],'abcd efgh']
 
-    keyword = params['event']
+    keyword = params['text2']
 
     texts = []
 

From ff7728bc96749c1827bbf8c9e2aceb16d42c11d9 Mon Sep 17 00:00:00 2001
From: abdelrahmankhedr <abdelrahmankhedr@gmail.com>
Date: Fri, 22 Jul 2022 10:45:47 +0300
Subject: [PATCH 3/9] Update README.md

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 8945e74..1055317 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 # NLP Server
-<p align="center">
+<!-- <p align="center">
   <img src="http://cdn.web64.com/nlp-norway/nlp-server-2.png" width="400">
-</p>
+</p> -->
 
 NLP Server is a Python 3 Flask web service for easy access to multilingual Natural Language Processing tasks such as language detection, article extraction, entity extraction, sentiment analysis, summarization and more.
 

From c5da9135d6efcafbd2f93beafac1f5986445fc1b Mon Sep 17 00:00:00 2001
From: abdelrahmankhedr <abdelrahmankhedr@gmail.com>
Date: Sun, 5 Feb 2023 00:13:11 +0300
Subject: [PATCH 4/9] Update nlpserver.py

---
 nlpserver.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/nlpserver.py b/nlpserver.py
index 3834baa..a0413c9 100644
--- a/nlpserver.py
+++ b/nlpserver.py
@@ -313,9 +313,11 @@ def polyglot_sentiment():
 	else:
 		language = params['lang']
 
-
-	polyglot_text = Text(params['text'], hint_language_code=language)
-	data['sentiment'] = polyglot_text.polarity
+	try:
+		polyglot_text = Text(params['text'], hint_language_code=language)
+		data['sentiment'] = polyglot_text.polarity
+	except ZeroDivisionError: 
+		data['sentiment'] = 0
 	return jsonify(data)
 
 

From 7e95f6dc2621557f683644ea9ccdbf5efa1a9793 Mon Sep 17 00:00:00 2001
From: abdelrahmankhedr <abdelrahmankhedr@gmail.com>
Date: Sun, 5 Feb 2023 00:37:01 +0300
Subject: [PATCH 5/9] Update nlpserver.py

---
 nlpserver.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/nlpserver.py b/nlpserver.py
index a0413c9..21544d7 100644
--- a/nlpserver.py
+++ b/nlpserver.py
@@ -16,9 +16,8 @@
 default_data['web64'] = {
 		'app': 'nlpserver',
 		'version':	'1.0.1',
-		'last_modified': '2019-01-15',
-		'documentation': 'http://nlpserver.web64.com/',
-		'github': 'https://github.com/web64/nlp-server',
+		'last_modified': '2022-01-15',
+		'github': 'https://github.com/abdelrahmankhedr/nlpserver',
 		'endpoints': ['/status', '/gensim/summarize', '/gensim/similarity', '/polyglot/neighbours', '/langid', '/polyglot/entities', '/polyglot/sentiment', '/newspaper', '/readability', '/spacy/entities', '/afinn'],
 	}
 

From 8c6bd3a5c48ef8b6276b69d921f736b91fa6a234 Mon Sep 17 00:00:00 2001
From: abdelrahmankhedr <abdelrahmankhedr@gmail.com>
Date: Sun, 5 Feb 2023 07:08:51 +0300
Subject: [PATCH 6/9] Update nlpserver.py

---
 nlpserver.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/nlpserver.py b/nlpserver.py
index 21544d7..aef64bd 100644
--- a/nlpserver.py
+++ b/nlpserver.py
@@ -320,6 +320,27 @@ def polyglot_sentiment():
 	return jsonify(data)
 
 
+@app.route("/trans/sentiment", methods=['GET','POST'])
+def trans_sentiment():
+	from transformers import pipeline
+	data = dict(default_data)
+	data['message'] = "Sentiment Analysis API - POST only"
+	data['sentiment'] = {}
+	
+	params = request.form # postdata
+	
+	if not params:
+		data['error'] = 'Missing parameters'
+		return jsonify(data)
+
+	if not params['text']:
+		data['error'] = 'Text parameter not found'
+		return jsonify(data)
+
+	classifier = pipeline('sentiment-analysis', model="distilbert-base-uncased-finetuned-sst-2-english")
+	data['sentiment'] = classifier([params['text']])
+	return jsonify(data)
+
 @app.route("/polyglot/entities", methods=['GET','POST'])
 def polyglot_entities():
 	from polyglot.text import Text

From 93e35a30f62376405810303975bd46b9d2fba6c1 Mon Sep 17 00:00:00 2001
From: abdelrahmankhedr <abdelrahmankhedr@gmail.com>
Date: Sun, 5 Feb 2023 07:09:42 +0300
Subject: [PATCH 7/9] Update requirements.txt

---
 requirements.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index c375b42..d4ca16b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -12,5 +12,6 @@ readability-lxml
 BeautifulSoup4
 afinn
 textblob
+transformer
 #summa
-#pattern
\ No newline at end of file
+#pattern

From e40511c98636abef7dca41b6d1e16d3ef81deb3c Mon Sep 17 00:00:00 2001
From: abdelrahmankhedr <abdelrahmankhedr@gmail.com>
Date: Sun, 5 Feb 2023 07:10:58 +0300
Subject: [PATCH 8/9] Update README.md

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 1055317..4d3e5f3 100644
--- a/README.md
+++ b/README.md
@@ -125,6 +125,7 @@ Endpoint|Method|Parameters|Info|Library
 /readability|POST|html|Article extraction for provided HTML|readability-lxml
 /polyglot/entities|POST|text,lang|Entity extraction and sentiment analysis for provided text|polyglot
 /polyglot/sentiment|POST|text,lang|Sentiment analysis for provided text|polyglot
+/trans/sentiment|POST|text,lang|Sentiment analysis for provided text using transformer|polyglot
 /polyglot/neighbours|GET|word,lang|Embeddings: neighbouring words|polyglot
 /langid|GET,POST|text|Language detection for provided text|langid
 /gensim/summarize|POST|text,word_count|Summarization of long text|gensim

From 5b1ebb5946ca513b6cb2403b1d04d1e9c74d51d4 Mon Sep 17 00:00:00 2001
From: abdelrahmankhedr <abdelrahmankhedr@gmail.com>
Date: Sun, 5 Feb 2023 07:11:39 +0300
Subject: [PATCH 9/9] Update README.md

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 4d3e5f3..ff2e12e 100644
--- a/README.md
+++ b/README.md
@@ -73,6 +73,7 @@ sudo pip3 install readability-lxml
 sudo pip3 install BeautifulSoup4
 sudo pip3 install afinn
 sudo pip3 install textblob
+sudo pip3 install transformer
 ```
 The /status api endpoint will list missing python modules: http://localhost:6400/status