Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .idea/.gitignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions .idea/modules.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions .idea/nlpserver.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/vcs.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 5 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# NLP Server
<p align="center">
<!-- <p align="center">
<img src="http://cdn.web64.com/nlp-norway/nlp-server-2.png" width="400">
</p>
</p> -->

NLP Server is a Python 3 Flask web service for easy access to multilingual Natural Language Processing tasks such as language detection, article extraction, entity extraction, sentiment analysis, summarization and more.

Expand Down Expand Up @@ -73,6 +73,7 @@ sudo pip3 install readability-lxml
sudo pip3 install BeautifulSoup4
sudo pip3 install afinn
sudo pip3 install textblob
sudo pip3 install transformer
```
The /status api endpoint will list missing python modules: http://localhost:6400/status

Expand Down Expand Up @@ -125,9 +126,11 @@ Endpoint|Method|Parameters|Info|Library
/readability|POST|html|Article extraction for provided HTML|readability-lxml
/polyglot/entities|POST|text,lang|Entity extraction and sentiment analysis for provided text|polyglot
/polyglot/sentiment|POST|text,lang|Sentiment analysis for provided text|polyglot
/trans/sentiment|POST|text,lang|Sentiment analysis for provided text using transformer|polyglot
/polyglot/neighbours|GET|word,lang|Embeddings: neighbouring words|polyglot
/langid|GET,POST|text|Language detection for provided text|langid
/gensim/summarize|POST|text,word_count|Summarization of long text|gensim
/gensim/similarity|POST|text1,text2|Similarity percentage of texts|gensim
/spacy/entities|POST|text,lang|Entity extraction for provided text in given language|SpaCy

## Usage
Expand Down
133 changes: 101 additions & 32 deletions nlpserver.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,31 +10,32 @@
app = Flask(__name__)

# configurations
#app.config['var1'] = 'test'
# app.config['var1'] = 'test'

default_data = {}
default_data['web64'] = {
'app': 'nlpserver',
'version': '1.0.1',
'last_modified': '2019-01-15',
'documentation': 'http://nlpserver.web64.com/',
'github': 'https://github.com/web64/nlp-server',
'endpoints': ['/status','/gensim/summarize', '/polyglot/neighbours', '/langid', '/polyglot/entities', '/polyglot/sentiment', '/newspaper', '/readability', '/spacy/entities', '/afinn'],
'last_modified': '2022-01-15',
'github': 'https://github.com/abdelrahmankhedr/nlpserver',
'endpoints': ['/status', '/gensim/summarize', '/gensim/similarity', '/polyglot/neighbours', '/langid', '/polyglot/entities', '/polyglot/sentiment', '/newspaper', '/readability', '/spacy/entities', '/afinn'],
}

default_data['message'] = 'NLP Server by web64.com'
data = default_data


@app.route("/")
def main():
return render_template('form.html')
#return jsonify(data)
# return jsonify(data)


@app.route('/status')
def status():
data = dict(default_data)
data['missing_libraries'] = []

try:
import textblob
except ImportError:
Expand All @@ -48,7 +49,10 @@ def status():
import gensim
except ImportError:
data['missing_libraries'].append('gensim')

try:
import jieba
except ImportError:
data['missing_libraries'].append('jieba')
try:
import newspaper
except ImportError:
Expand All @@ -63,12 +67,12 @@ def status():
import readability
except ImportError:
data['missing_libraries'].append('readability')

try:
import bs4
except ImportError:
data['missing_libraries'].append('bs4')

try:
import afinn
except ImportError:
Expand Down Expand Up @@ -106,7 +110,7 @@ def spacy_entities():
if request.method == 'GET':
return jsonify(data)

params = request.form # postdata
params = request.form # postdata

if not params:
data['error'] = 'Missing parameters'
Expand All @@ -121,20 +125,20 @@ def spacy_entities():
else:
lang = params['lang']

nlp = spacy.load( lang )
doc = nlp( params['text'] )
data['entities'] = {}
counters = {}
nlp = spacy.load(lang)
doc = nlp(params['text'])
data['entities'] = {}

counters = {}
for ent in doc.ents:
if not ent.label_ in data['entities']:
data['entities'][ent.label_] = dict()
counters[ent.label_] = 0
else:
counters[ent.label_] += 1
data['entities'][ ent.label_ ][ counters[ent.label_] ] = ent.text
#data['entities'][ent.label_].add( ent.text )

data['entities'][ent.label_][counters[ent.label_]] = ent.text
# data['entities'][ent.label_].add( ent.text )

return jsonify(data)

Expand All @@ -143,13 +147,11 @@ def spacy_entities():
def gensim_summarize():
from gensim.summarization.summarizer import summarize
data = dict(default_data)
data['message'] = "Summarize long text - Usage: 'text' POST parameter"
data['message'] = "Summarize long text - Usage: 'text' POST parameter"+request.form['text']

params = {}

if request.method == 'GET':
return jsonify(data)

params = request.form # postdata
params = request.form # postdata

if not params:
data['error'] = 'Missing parameters'
Expand All @@ -163,17 +165,61 @@ def gensim_summarize():
word_count = None
else:
word_count = int(params['word_count'])
data['summarize'] = summarize( text=params['text'], word_count=word_count )

data['summarize'] = summarize(text=params['text'], word_count=word_count)

return jsonify(data)


@app.route("/gensim/similarity", methods=['GET', 'POST'])
def gensim_similarity():

import jieba

from gensim import corpora, models, similarities
data = dict(default_data)
data['message'] = "get similarity percentage of phases"

params = {}

params = request.form # postdata

phases = [params['text1'],'abcd efgh']

keyword = params['text2']

texts = []

for phase in phases:
texts.append(list(jieba.cut(phase)))

dictionary = corpora.Dictionary(texts)

feature_cnt = len(dictionary.token2id)

corpus = [dictionary.doc2bow(text) for text in texts]

tfidf = models.TfidfModel(corpus)

kw_vector = dictionary.doc2bow( list(jieba.cut(keyword)) )

index = similarities.SparseMatrixSimilarity(tfidf[corpus], num_features = feature_cnt)

sim = index[tfidf[kw_vector]]

data['sim']= sim.tolist()
#for i in range(len(sim)):
#data['simlarity'+str((i+1))] = str(sim[i])

return jsonify(data)


@app.route("/polyglot/neighbours", methods=['GET'])
def embeddings():
from polyglot.text import Word
data = dict(default_data)
data['message'] = "Neighbours (Embeddings) - Find neighbors of word API - Parameters: 'word', 'lang' language (default: en)"

params = {}

params['word']= request.args.get('word')
Expand Down Expand Up @@ -266,12 +312,35 @@ def polyglot_sentiment():
else:
language = params['lang']


polyglot_text = Text(params['text'], hint_language_code=language)
data['sentiment'] = polyglot_text.polarity
try:
polyglot_text = Text(params['text'], hint_language_code=language)
data['sentiment'] = polyglot_text.polarity
except ZeroDivisionError:
data['sentiment'] = 0
return jsonify(data)


@app.route("/trans/sentiment", methods=['GET','POST'])
def trans_sentiment():
from transformers import pipeline
data = dict(default_data)
data['message'] = "Sentiment Analysis API - POST only"
data['sentiment'] = {}

params = request.form # postdata

if not params:
data['error'] = 'Missing parameters'
return jsonify(data)

if not params['text']:
data['error'] = 'Text parameter not found'
return jsonify(data)

classifier = pipeline('sentiment-analysis', model="distilbert-base-uncased-finetuned-sst-2-english")
data['sentiment'] = classifier([params['text']])
return jsonify(data)

@app.route("/polyglot/entities", methods=['GET','POST'])
def polyglot_entities():
from polyglot.text import Text
Expand Down Expand Up @@ -360,7 +429,7 @@ def readability():

data['readability']['title'] = doc.title()
data['readability']['short_title'] = doc.short_title()
#data['readability']['content'] = doc.content()
# data['readability']['content'] = doc.content()
data['readability']['article_html'] = doc.summary( html_partial=True )

soup = BeautifulSoup( data['readability']['article_html'] )
Expand All @@ -378,7 +447,7 @@ def afinn_sentiment():


data['afinn'] = 0
#data['afinn'] = afinn.score('This is utterly excellent!')
# data['afinn'] = afinn.score('This is utterly excellent!')

params = request.form # postdata

Expand Down Expand Up @@ -457,7 +526,7 @@ def newspaper():
data['newspaper']['source_url'] = article.source_url
data['newspaper']['meta_lang'] = article.meta_lang

#Detect language
# Detect language
if len(article.text) > 100:
lang_data = langid.classify( article.title + ' ' + article.text )
data['langid']['language'] = lang_data[0]
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,6 @@ readability-lxml
BeautifulSoup4
afinn
textblob
transformer
#summa
#pattern
#pattern