From dbddf0bdc4cd3e88d4ab6d664671dc51626979f6 Mon Sep 17 00:00:00 2001
From: Magdalena <mdeschner@hotmail.de>
Date: Sun, 18 Aug 2019 07:53:27 +0200
Subject: [PATCH] add docstrings (numPy style) to methods in utils

---
 multi_rake/utils.py | 54 +++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 52 insertions(+), 2 deletions(-)

diff --git a/multi_rake/utils.py b/multi_rake/utils.py
index 1ad8079..24cb94e 100644
--- a/multi_rake/utils.py
+++ b/multi_rake/utils.py
@@ -11,6 +11,20 @@
 
 
 def detect_language(text, proba_threshold):
+    """Detect language code and probability of input text based on 'cld2'.
+    
+    Parameters
+    ----------
+    text : utf8Bytes
+        Text to detect language as unicode.
+    proba_threshold : float
+        Minimum probability cld2 language detection has to output in order to accept proposed language code.
+    
+    Returns
+    -------
+    str
+        Language code detected by cld2.
+    """
     _, _, details = cld2.detect(text)
 
     language_code = details[0].language_code
@@ -20,11 +34,35 @@ def detect_language(text, proba_threshold):
         return language_code
 
 
-def keep_only_letters(string):
-    return ' '.join(token.group() for token in LETTERS_RE.finditer(string))
+def keep_only_letters(text):
+    """Apply regex to only keep letters.
+    
+    Parameters
+    ----------
+    text : str
+        Text to search for letters in.
+    
+    Returns
+    -------
+    str
+        Input text cleaned by regex to only contain letters.
+    """
+    return ' '.join(token.group() for token in LETTERS_RE.finditer(text))
 
 
 def separate_words(text):
+    """Seperate text to tokens by whitespace and dimiss numeric tokens.
+    
+    Parameters
+    ----------
+    text : str
+        Text to tokenize.
+    
+    Returns
+    -------
+    list of str
+        Tokenized text.
+    """
     words = []
 
     for word in text.split():
@@ -35,5 +73,17 @@ def separate_words(text):
 
 
 def split_sentences(text):
+    """Split text into sentences with custom regex boundaries.
+    
+    Parameters
+    ----------
+    text : str
+        Text to split on sentence delimiters.
+    
+    Returns
+    -------
+    list of str
+        Text split into sentences.
+    """
     sentences = SENTENCE_DELIMITERS_RE.split(text)
     return sentences