diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..90cdc7e9 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.mypy_cache/* +__pycache__/* diff --git a/SortingComparison.py b/SortingComparison.py deleted file mode 100644 index e4a86bcd..00000000 --- a/SortingComparison.py +++ /dev/null @@ -1,110 +0,0 @@ -from timeit import default_timer as timer -import random - - -def insertion_sort(list_to_sort): - for step in range(1, len(list_to_sort)): - key = list_to_sort[step] - j = step - 1 - while j >= 0 and list_to_sort[j] > key: - list_to_sort[j + 1] = list_to_sort[j] - j -= 1 - list_to_sort[j + 1] = key - - -def quick_sort(list_to_sort): - def partition(sub_list, low, hi): - pivot = sub_list[(low + hi) // 2] - left = low - right = hi - while left <= right: - while sub_list[left] < pivot: - left += 1 - while sub_list[right] > pivot: - right -= 1 - if left <= right: - sub_list[left], sub_list[right] = sub_list[right], sub_list[left] - left += 1 - right -= 1 - return left, right - - def quick_sort_fun(list_to_sort, low, hi): - if low < hi: - left, right = partition(list_to_sort, low, hi) - quick_sort_fun(list_to_sort, low, right) - quick_sort_fun(list_to_sort, left, hi) - - quick_sort_fun(list_to_sort, 0, len(list_to_sort) - 1) - - -# shell sort using Knuth's sequence -def shell_sort(list_to_sort): - def sublist_sort(list_to_sort, start_index, gap): - for i in range(start_index + gap, len(list_to_sort), gap): - current_val = list_to_sort[i] - index = i - while index >= gap and list_to_sort[index - gap] > current_val: - list_to_sort[index] = list_to_sort[index - gap] - index -= gap - list_to_sort[index] = current_val - - n = len(list_to_sort) - gap = 1 - while gap < n // 3: - gap = 3 * gap + 1 - while gap > 0: - for i in range(gap): - sublist_sort(list_to_sort, i, gap) - gap //= 3 - - -def heap_sort(list_to_sort): - def heapify(list_to_sort, n, i): - largest_index = i - left_index = 2 * i + 1 - right_index = 2 * i + 2 - - if left_index < n and list_to_sort[i] < list_to_sort[left_index]: - largest_index = left_index - if right_index < n and list_to_sort[largest_index] < list_to_sort[right_index]: - largest_index = right_index - if largest_index != i: - list_to_sort[i], list_to_sort[largest_index] = list_to_sort[largest_index], list_to_sort[i] - heapify(list_to_sort, n, largest_index) - - n = len(list_to_sort) - for i in range(n, -1, -1): - heapify(list_to_sort, n, i) - for i in range(n - 1, 0, -1): - list_to_sort[i], list_to_sort[0] = list_to_sort[0], list_to_sort[i] - heapify(list_to_sort, i, 0) - - -def benchmark_sorting_algorithms(functions_list, list_to_sort): - print("BENCHMARK START (Times in ms)") - - def benchmark_one_function(function_to_benchmark, *arguments): - start = timer() - function_to_benchmark(*arguments) - end = timer() - print("{0}: {1}".format(function_to_benchmark.__name__, (end - start) * 1000)) - - for function in functions_list: - benchmark_one_function(function, list_to_sort.copy()) - - -def main(): - list_to_sort_1 = [random.randint(-10000, 10000) for _ in range(10000)] - list_to_sort_2 = list(range(10000)) - list_to_sort_3 = list(range(10000, 0, -1)) - - print("Random list") - benchmark_sorting_algorithms([quick_sort, heap_sort, shell_sort, insertion_sort], list_to_sort_1) - print("Increasing list") - benchmark_sorting_algorithms([quick_sort, heap_sort, shell_sort, insertion_sort], list_to_sort_2) - print("Decreasing list") - benchmark_sorting_algorithms([quick_sort, heap_sort, shell_sort, insertion_sort], list_to_sort_3) - - -if __name__ == "__main__": - main() diff --git a/prediction_algorithm.py b/prediction_algorithm.py new file mode 100644 index 00000000..40f381e0 --- /dev/null +++ b/prediction_algorithm.py @@ -0,0 +1,235 @@ +import pandas as pd +import numpy as np +from sklearn.preprocessing import MinMaxScaler +from sklearn.model_selection import train_test_split, cross_val_score +import matplotlib.pyplot as plt +from sklearn.neighbors import KNeighborsRegressor +from sklearn.metrics import mean_squared_error +from sklearn.metrics import mean_absolute_error +from random import randrange +import requests as req + +norm = {} + + +def fetch_data(currency, startTime, endTime): + limit = min(1000, int((endTime - startTime) / 86400 + 1)) + data = req.get( + "https://www.bitstamp.net/api/v2/ohlc/{0}/".format(currency), + params={ + "start": startTime - 86400, + "end": endTime, + "step": 86400, + "limit": limit, + }, + ) + return data.json() + + +def parse_data(data): + global norm + df = pd.DataFrame.from_records(data["data"]["ohlc"]) + df.set_index("timestamp", inplace=True) + df = df.apply(pd.to_numeric, errors="coerce") + df["ratio"] = df["close"] / df["volume"] + norm = { + "close": [df["close"].min(), df["close"].max()], + "volume": [df["volume"].min(), df["volume"].max()], + "ratio": [df["ratio"].min(), df["ratio"].max()], + } + df["close"] = (df["close"] - df["close"].min()) / ( + df["close"].max() - df["close"].min() + ) + df["volume"] = (df["volume"] - df["volume"].min()) / ( + df["volume"].max() - df["volume"].min() + ) + df["ratio"] = (df["ratio"] - df["ratio"].min()) / ( + df["ratio"].max() - df["ratio"].min() + ) + df["price_diff"] = df["close"].diff() + df["volume_diff"] = df["volume"].diff() + return df + + +def denormalize(value): + price = value[0] * (norm["close"][1] - norm["close"][0]) + norm["close"][0] + volume = value[1] * (norm["volume"][1] - norm["volume"][0]) + norm["volume"][0] + return [price, volume] + + +def normalize(value): + price = (value[0] - norm["close"][0]) / (norm["close"][1] - norm["close"][0]) + volume = (value[1] - norm["volume"][0]) / (norm["volume"][1] - norm["volume"][0]) + return [price, volume] + + +def calc_ratio(value): + value_denormalized = denormalize(value) + ratio = value_denormalized[0] / value_denormalized[1] + ratio = (ratio - norm["ratio"][0]) / (norm["ratio"][1] - norm["ratio"][0]) + return ratio + + +def predict_next_day(last_value, knn): + next_day = knn.predict(last_value) + previous_day = last_value[0][:2] + diffs = [] + diffs.append(next_day[0][0] - previous_day[0]) + diffs.append(next_day[0][1] - previous_day[1]) + diffs.append(calc_ratio([next_day[0][0], next_day[0][1]])) + next_day = np.append(next_day, diffs) + return np.reshape(next_day, (-1, 1)).T + + +def plot(y_test, y_, y_future, title): + fig, ax1 = plt.subplots(figsize=(30, 10)) + + y_denorm = np.apply_along_axis(denormalize, 1, y_) + y_test_denorm = np.apply_along_axis(denormalize, 1, y_test) + + ax1.plot( + np.arange(len(y_denorm)), y_denorm[:, [0]], color="navy", label="predictionTest" + ) + ax1.plot( + np.arange(len(y_test_denorm)), + y_test_denorm[:, [0]], + color="orange", + label="realValue", + ) + ax1.plot( + range(len(y_denorm), len(y_denorm) + len(y_future)), + np.array(y_future)[:, [0]], + color="green", + label="predictionFutur", + ) + ax1.plot( + [len(y_denorm) - 1, len(y_denorm)], + [y_denorm[-1, [0]], np.array(y_future)[0, [0]]], + color="green", + ) + ax2 = ax1.twinx() + ax2.set_xlabel("date") + ax2.set_ylabel("volume") + ax2.tick_params(axis="y") + + ax2.bar( + np.arange(len(y_denorm)), + np.array(y_denorm[:, [1]]).ravel(), + alpha=0.5, + width=0.3, + ) + ax2.bar( + range(len(y_denorm), len(y_denorm) + len(y_future)), + np.array(y_future)[:, [1]].ravel(), + alpha=0.5, + width=0.3, + ) + ax2.bar(0, 25e10, alpha=0) + + plt.axis("tight") + ax1.legend() + plt.title(title) + plt.show() + + +def future_values(next_days, X_val, knn): + y_sim = [] + next_day_value = predict_next_day(X_val, knn) + y_sim.append(denormalize(next_day_value.tolist()[0][:2])) + + for i in range(0, next_days): + next_day_value = predict_next_day(next_day_value, knn) + y_sim.append(denormalize(next_day_value.tolist()[0][:2])) + + return y_sim + + +def simulate(iterations, neighbors, future_predictions, currency, start, end): + # data split + data = fetch_data(currency, start, end) + df = parse_data(data) + + X = np.array(df[["close", "volume", "price_diff", "volume_diff", "ratio"]])[1:] + y = np.array(df[["close", "volume"]])[1:] + + random = randrange(100000000) + + knn = KNeighborsRegressor(neighbors) + knn.fit(X, y) + + y_ = knn.predict(X[-int(1 - len(y) * 0.8) :]) + + mse = mean_squared_error(y[-int(1 - len(y) * 0.8) :], y_) + mae = mean_absolute_error(y[-int(1 - len(y) * 0.8) :], y_) + + y_future = np.array(future_values(future_predictions, X[[-1]], knn)) + + plot( + y[-int(1 - len(y) * 0.8) :], + y_, + y_future, + "Real prices 1 simulation without noise", + ) + + y_mean = y_ + y_test_mean = y[-int(1 - len(y) * 0.8) :] + + print( + f"Mean squared error: {mse} Mean absolute error:{mae} Random seed: {random}" + ) + print( + f"Mean Close: {np.mean(y_future, axis=(0))[0]} Mean Volume: {np.mean(y_future, axis=(0))[1]}\n" + + f"Median Close: {np.median(y_future, axis=(0))[0]} Median Volume: {np.mean(y_future, axis=(0))[1]}\n" + + f"Std Close: {np.std(y_future, axis=(0))[0]} Std Volume: {np.mean(y_future, axis=(0))[1]}\n" + ) + + mse = 0 + mae = 0 + print("\n") + + for i in range(0, iterations - 1): + + # Reset + + X_train = X[: int(len(X) * 0.8)] + X_test = X[: int(1 - len(X) * 0.8)] + y_train = y[: int(len(y) * 0.8)] + y_test = y[: int(1 - len(y) * 0.8)] + + # Noise + + X_train += X_train * np.random.normal(0, 0.02, size=(len(X_train), 5)) + X_test += X_test * np.random.normal(0, 0.02, size=(len(X_test), 5)) + y_train += y_train * np.random.normal(0, 0.02, size=(len(y_train), 2)) + y_test += y_test * np.random.normal(0, 0.02, size=(len(y_test), 2)) + + knn.fit(X_train, y_train) + y_ = knn.predict(X_test) + + mse += mean_squared_error(y_test, y_) + mae += mean_absolute_error(y_test, y_) + + y_mean += y_ + y_test_mean += y_test + y_future += np.array(future_values(future_predictions, X_test[[-1]], knn)) + + y_mean /= iterations + y_test_mean /= iterations + y_future /= iterations + + print( + f"Mean squared error: {mse/iterations} Mean absolute error:{mae/iterations}" + ) + plot( + y_mean, y_test_mean, y_future, "Mean prices from 100 simulations with noise", + ) + print( + f"Mean Close: {np.mean(y_future, axis=(0))[0]} Mean Volume: {np.mean(y_future, axis=(0))[1]}\n" + + f"Median Close: {np.median(y_future, axis=(0))[0]} Median Volume: {np.mean(y_future, axis=(0))[1]}\n" + + f"Std Close: {np.std(y_future, axis=(0))[0]} Std Volume: {np.mean(y_future, axis=(0))[1]}\n" + ) + + +# simulate(neighbors, iterations, days for prediction, currency , timestamp start, timestamp end) +if __name__ == "__main__": + simulate(8, 100, 10, "ethusd", 1551705072, 1591705072) diff --git a/searching_methods/src/algorithms/bubble.java b/searching_methods/src/algorithms/bubble.java deleted file mode 100644 index bd80ecee..00000000 --- a/searching_methods/src/algorithms/bubble.java +++ /dev/null @@ -1,28 +0,0 @@ -package algorithms; - -import java.util.ArrayList; - -public class bubble extends sorting_algorithm -{ - public bubble() - { - super(); - } - - public bubble(ArrayList source) - { - super(source); - } - - @Override - public ArrayList sort_out() - { - int size = to_be_sorted.size(); - for(int i = 0; i < size; i++) - for(int j = 0; j < size - 1; j++) - if(to_be_sorted.get(j) > to_be_sorted.get(j + 1)) - swap(j, j + 1); - - return to_be_sorted; - } -} diff --git a/searching_methods/src/algorithms/insert.java b/searching_methods/src/algorithms/insert.java deleted file mode 100644 index 4e81792c..00000000 --- a/searching_methods/src/algorithms/insert.java +++ /dev/null @@ -1,37 +0,0 @@ -package algorithms; - -import java.util.ArrayList; - -public class insert extends sorting_algorithm -{ - public insert() - { - super(); - } - - public insert(ArrayList to_be_sorted) - { - super(to_be_sorted); - } - - @Override - public ArrayList sort_out() - { - int size = to_be_sorted.size(); - for (int i = 1; i < size; i++) - { - int key = to_be_sorted.get(i); - int j = i - 1; - - while (j >= 0 && to_be_sorted.get(j) > key) - { - to_be_sorted.set(j + 1, to_be_sorted.get(j)); - j = j - 1; - } - - to_be_sorted.set(j + 1, key); - } - - return to_be_sorted; - } -} diff --git a/searching_methods/src/algorithms/quick.java b/searching_methods/src/algorithms/quick.java deleted file mode 100644 index b1b7ea75..00000000 --- a/searching_methods/src/algorithms/quick.java +++ /dev/null @@ -1,44 +0,0 @@ -package algorithms; - -import java.util.ArrayList; - -public class quick extends sorting_algorithm -{ - public quick() - { - super(); - } - - public quick(ArrayList source) - { - super(source); - } - - @Override - public ArrayList sort_out() - { - sorting_procedure(0, to_be_sorted.size() - 1); - - return to_be_sorted; - } - - void sorting_procedure(int low, int high) - { - int pivot = to_be_sorted.get((low + high) / 2); - int i = low, j = high; - - do{ - while (to_be_sorted.get(i) < pivot) i++; - while (to_be_sorted.get(j) > pivot) j--; - if (i <= j) - { - swap(i, j); - i++; - j--; - } - } while (i <= j); - - if (j > low) sorting_procedure(low, j); - if (i < high) sorting_procedure(i, high); - } -} diff --git a/searching_methods/src/algorithms/select.java b/searching_methods/src/algorithms/select.java deleted file mode 100644 index 2b5ac8f9..00000000 --- a/searching_methods/src/algorithms/select.java +++ /dev/null @@ -1,38 +0,0 @@ -package algorithms; - -import java.util.ArrayList; - -public class select extends sorting_algorithm -{ - public select() - { - super(); - } - - public select(ArrayList source) - { - super(source); - } - - @Override - public ArrayList sort_out() - { - for (int i = to_be_sorted.size(); i >= 2; i--) - { - int max = max_element_index(i); - if (max != i - 1) - swap(i - 1, max); - } - - return to_be_sorted; - } - - int max_element_index(int limit) - { - int max = 0; - for (int i = 1; i < limit; i++) - if (to_be_sorted.get(i) > to_be_sorted.get(max)) - max = i; - return max; - } -} diff --git a/searching_methods/src/algorithms/sorting_algorithm.java b/searching_methods/src/algorithms/sorting_algorithm.java deleted file mode 100644 index 34f95675..00000000 --- a/searching_methods/src/algorithms/sorting_algorithm.java +++ /dev/null @@ -1,38 +0,0 @@ -package algorithms; - -import java.util.ArrayList; - -public abstract class sorting_algorithm -{ - ArrayList to_be_sorted; - - sorting_algorithm() - { - this.to_be_sorted = new ArrayList<>(); - } - - sorting_algorithm(ArrayList to_be_sorted) - { - this.to_be_sorted = new ArrayList<>(to_be_sorted.size()); - this.to_be_sorted.addAll(to_be_sorted); - } - public void change_source(ArrayList to_be_sorted) - { - this.to_be_sorted = new ArrayList<>(to_be_sorted.size()); - this.to_be_sorted.addAll(to_be_sorted); - } - - void swap(int index_1, int index_2) - { - int temp = to_be_sorted.get(index_1); - to_be_sorted.set(index_1, to_be_sorted.get(index_2)); - to_be_sorted.set(index_2, temp); - } - public ArrayList sort_out(ArrayList to_be_sorted) - { - change_source(to_be_sorted); - return sort_out(); - } - - abstract public ArrayList sort_out(); -} diff --git a/searching_methods/src/testing/main.java b/searching_methods/src/testing/main.java deleted file mode 100644 index 14206c2f..00000000 --- a/searching_methods/src/testing/main.java +++ /dev/null @@ -1,56 +0,0 @@ -package testing; - -import algorithms.*; - -import java.util.ArrayList; -import java.util.Random; - -public class main -{ - static Random seed = new Random(); - static ArrayList randomize_new_array(int size) - { - ArrayList new_array = new ArrayList(size); - for (int i = 0; i < size; i++) new_array.add(seed.nextInt()); - return new_array; - } - - static void print_array_out(ArrayList array) - { - for (Integer integer : array) - System.out.println(integer); - - System.out.println("๐Ÿงถ-----------------------๐Ÿˆ"); - } - - static float estimate_sorting_duration(sorting_algorithm algorithm, ArrayList> arrays) - { - if(arrays.isEmpty()) return -1; - - long start_time = System.currentTimeMillis(); - - for (ArrayList array : arrays) - algorithm.sort_out(array); - - return (float)(System.currentTimeMillis() - start_time) / arrays.size(); - } - - public static void main(String args[]) - { - int number_of_tests = 10, test_size = 10000; - ArrayList> set_of_tests = new ArrayList>(number_of_tests); - - for (int i = 0; i < number_of_tests; i++) - set_of_tests.add(randomize_new_array(test_size)); - - bubble bubble_sort = new bubble(); - insert insert_sort = new insert(); - select select_sort = new select(); - quick quick_sort = new quick(); - - System.out.println(estimate_sorting_duration(bubble_sort, set_of_tests) + " [ms]"); - System.out.println(estimate_sorting_duration(insert_sort, set_of_tests) + " [ms]"); - System.out.println(estimate_sorting_duration(select_sort, set_of_tests) + " [ms]"); - System.out.println(estimate_sorting_duration(quick_sort, set_of_tests) + " [ms]"); - } -}