diff --git a/Raizen.ipynb b/Raizen.ipynb new file mode 100644 index 00000000..f85c14f2 --- /dev/null +++ b/Raizen.ipynb @@ -0,0 +1,746 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "def monthToNum(shortMonth):\n", + " return {\n", + " 'Jan': 1,\n", + " 'Fev': 2,\n", + " 'Mar': 3,\n", + " 'Abr': 4,\n", + " 'Mai': 5,\n", + " 'Jun': 6,\n", + " 'Jul': 7,\n", + " 'Ago': 8,\n", + " 'Set': 9, \n", + " 'Out': 10,\n", + " 'Nov': 11,\n", + " 'Dez': 12\n", + " }[shortMonth]" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "def function(df_in):\n", + " \n", + " # criando uma cópia do dataframe original para não manipular o original\n", + " df = df_in.copy()\n", + " \n", + " # ajuste das colunas para o dataframe final\n", + " column_names_to_change = {'COMBUSTÍVEL': 'product', 'ESTADO': 'uf', 'UNIDADE': 'unit' }\n", + " final = pd.DataFrame(columns = ['product', 'ANO', 'uf', 'unit', 'month', 'volume'])\n", + "\n", + " # mudando nome da colunas e retirando colunas desnecessárias\n", + " df.rename(columns = column_names_to_change, inplace = True)\n", + " df.drop('REGIÃO', inplace = True, axis=1)\n", + " \n", + " # ajustar o nome de cada combustivel\n", + " df['product'] = df['product'].apply(lambda x: x.split(' (')[0])\n", + "\n", + " # nome das colunas separadas em duas partes: x = left & y = right\n", + " x = df.columns[0:4] # combustivel, ano, estado e unidade\n", + " y = df.columns[4:-1] # 12 meses do ano\n", + " \n", + " valores_errados = 0\n", + " \n", + " # iterando no dataframe inteiro\n", + " for i in range(len(df)):\n", + " # coletando o lado esquerdo de cada linha e copiando a mesma para os 12 meses do ano\n", + " aux_left = pd.DataFrame(df[x].loc[i]).transpose()\n", + " aux_left = pd.concat([aux_left]*len(y), ignore_index=True)\n", + " \n", + " # coletando o lado direito de cada linha\n", + " aux_right = pd.DataFrame(df[y].loc[i]).reset_index(drop = False)\n", + " aux_right.columns = ['month', 'volume']\n", + " \n", + " # checando se o valor coletado dos meses é igual ao total informado originalmente\n", + " if sum(aux_right['volume']) != df['TOTAL'].loc[i]:\n", + " valores_errados += 1\n", + " \n", + " # juntando o lado direito e esquerdo novamente\n", + " result = pd.concat([aux_left, aux_right], axis=1)\n", + " \n", + " # \"appendando\" a linha formatada para o dataframe final\n", + " final = final.append(result, ignore_index = True)\n", + " \n", + " \n", + " # convertendo o nome do mês para o número\n", + " final['month'] = final['month'].apply(lambda x: monthToNum(x))\n", + " aux = []\n", + " for i in range(len(final)):\n", + " aux.append(str(final['ANO'][i]) + '-' + str(final['month'][i]))\n", + " \n", + " # criação da coluna year_month e excluindo as colunas desnecessárias\n", + " final['year_month'] = aux\n", + " final.drop(['ANO', 'month'], inplace = True, axis=1)\n", + " \n", + " # criação da coluna created_at\n", + " created_at = [pd.Timestamp(time.time(), unit='s')]*len(final)\n", + " final['created_at'] = created_at\n", + " \n", + " print(\"Quantidade de Totais errados:\", valores_errados)\n", + " \n", + " return final[['year_month', 'uf', 'product', 'unit', 'volume', 'created_at']]" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "import pandas as pd\n", + "\n", + "import time" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
| \n", + " | COMBUSTÍVEL | \n", + "ANO | \n", + "REGIÃO | \n", + "ESTADO | \n", + "UNIDADE | \n", + "Jan | \n", + "Fev | \n", + "Mar | \n", + "Abr | \n", + "Mai | \n", + "Jun | \n", + "Jul | \n", + "Ago | \n", + "Set | \n", + "Out | \n", + "Nov | \n", + "Dez | \n", + "TOTAL | \n", + "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", + "GASOLINA C (m3) | \n", + "2000 | \n", + "REGIÃO NORTE | \n", + "RONDÔNIA | \n", + "m3 | \n", + "9563.263 | \n", + "11341.229 | \n", + "9369.746 | \n", + "10719.983 | \n", + "11165.968 | \n", + "12312.451 | \n", + "11220.970 | \n", + "12482.281 | \n", + "13591.122 | \n", + "11940.570 | \n", + "11547.576 | \n", + "10818.094 | \n", + "136073.253 | \n", + "
| 1 | \n", + "GASOLINA C (m3) | \n", + "2000 | \n", + "REGIÃO NORTE | \n", + "ACRE | \n", + "m3 | \n", + "3065.758 | \n", + "3495.290 | \n", + "2946.930 | \n", + "3023.920 | \n", + "3206.930 | \n", + "3612.580 | \n", + "3264.460 | \n", + "3835.740 | \n", + "3676.571 | \n", + "3225.610 | \n", + "3289.718 | \n", + "3358.346 | \n", + "40001.853 | \n", + "
| 2 | \n", + "GASOLINA C (m3) | \n", + "2000 | \n", + "REGIÃO NORTE | \n", + "AMAZONAS | \n", + "m3 | \n", + "17615.604 | \n", + "20258.200 | \n", + "18741.344 | \n", + "19604.023 | \n", + "20221.674 | \n", + "20792.616 | \n", + "19912.898 | \n", + "21869.338 | \n", + "21145.643 | \n", + "20633.175 | \n", + "20766.918 | \n", + "21180.919 | \n", + "242742.352 | \n", + "
| 3 | \n", + "GASOLINA C (m3) | \n", + "2000 | \n", + "REGIÃO NORTE | \n", + "RORAIMA | \n", + "m3 | \n", + "3259.300 | \n", + "3636.216 | \n", + "3631.569 | \n", + "3348.416 | \n", + "3394.016 | \n", + "4078.616 | \n", + "3346.616 | \n", + "4029.900 | \n", + "4358.516 | \n", + "3716.032 | \n", + "3200.400 | \n", + "3339.332 | \n", + "43338.929 | \n", + "
| 4 | \n", + "GASOLINA C (m3) | \n", + "2000 | \n", + "REGIÃO NORTE | \n", + "PARÁ | \n", + "m3 | \n", + "28830.479 | \n", + "32297.047 | \n", + "27310.979 | \n", + "29396.384 | \n", + "26511.009 | \n", + "36553.250 | \n", + "31807.840 | \n", + "31009.972 | \n", + "29755.907 | \n", + "28661.951 | \n", + "28145.784 | \n", + "29294.796 | \n", + "359575.398 | \n", + "
| \n", + " | year_month | \n", + "uf | \n", + "product | \n", + "unit | \n", + "volume | \n", + "created_at | \n", + "
|---|---|---|---|---|---|---|
| 0 | \n", + "2000-1 | \n", + "RONDÔNIA | \n", + "GASOLINA C | \n", + "m3 | \n", + "9563.263000 | \n", + "2021-08-02 11:54:31.704335213 | \n", + "
| 1 | \n", + "2000-2 | \n", + "RONDÔNIA | \n", + "GASOLINA C | \n", + "m3 | \n", + "11341.229000 | \n", + "2021-08-02 11:54:31.704335213 | \n", + "
| 2 | \n", + "2000-3 | \n", + "RONDÔNIA | \n", + "GASOLINA C | \n", + "m3 | \n", + "9369.746000 | \n", + "2021-08-02 11:54:31.704335213 | \n", + "
| 3 | \n", + "2000-4 | \n", + "RONDÔNIA | \n", + "GASOLINA C | \n", + "m3 | \n", + "10719.983000 | \n", + "2021-08-02 11:54:31.704335213 | \n", + "
| 4 | \n", + "2000-5 | \n", + "RONDÔNIA | \n", + "GASOLINA C | \n", + "m3 | \n", + "11165.968000 | \n", + "2021-08-02 11:54:31.704335213 | \n", + "
| ... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
| 54427 | \n", + "2020-8 | \n", + "DISTRITO FEDERAL | \n", + "GLP | \n", + "m3 | \n", + "15358.490942 | \n", + "2021-08-02 11:54:31.704335213 | \n", + "
| 54428 | \n", + "2020-9 | \n", + "DISTRITO FEDERAL | \n", + "GLP | \n", + "m3 | \n", + "13937.451087 | \n", + "2021-08-02 11:54:31.704335213 | \n", + "
| 54429 | \n", + "2020-10 | \n", + "DISTRITO FEDERAL | \n", + "GLP | \n", + "m3 | \n", + "NaN | \n", + "2021-08-02 11:54:31.704335213 | \n", + "
| 54430 | \n", + "2020-11 | \n", + "DISTRITO FEDERAL | \n", + "GLP | \n", + "m3 | \n", + "NaN | \n", + "2021-08-02 11:54:31.704335213 | \n", + "
| 54431 | \n", + "2020-12 | \n", + "DISTRITO FEDERAL | \n", + "GLP | \n", + "m3 | \n", + "NaN | \n", + "2021-08-02 11:54:31.704335213 | \n", + "
54432 rows × 6 columns
\n", + "| \n", + " | year_month | \n", + "uf | \n", + "product | \n", + "unit | \n", + "volume | \n", + "created_at | \n", + "
|---|---|---|---|---|---|---|
| 0 | \n", + "2013-1 | \n", + "RONDÔNIA | \n", + "ÓLEO DIESEL S-10 | \n", + "m3 | \n", + "3517.60 | \n", + "2021-08-02 11:54:42.672039509 | \n", + "
| 1 | \n", + "2013-2 | \n", + "RONDÔNIA | \n", + "ÓLEO DIESEL S-10 | \n", + "m3 | \n", + "3681.70 | \n", + "2021-08-02 11:54:42.672039509 | \n", + "
| 2 | \n", + "2013-3 | \n", + "RONDÔNIA | \n", + "ÓLEO DIESEL S-10 | \n", + "m3 | \n", + "4700.67 | \n", + "2021-08-02 11:54:42.672039509 | \n", + "
| 3 | \n", + "2013-4 | \n", + "RONDÔNIA | \n", + "ÓLEO DIESEL S-10 | \n", + "m3 | \n", + "5339.20 | \n", + "2021-08-02 11:54:42.672039509 | \n", + "
| 4 | \n", + "2013-5 | \n", + "RONDÔNIA | \n", + "ÓLEO DIESEL S-10 | \n", + "m3 | \n", + "6166.40 | \n", + "2021-08-02 11:54:42.672039509 | \n", + "
| ... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
| 12955 | \n", + "2020-8 | \n", + "DISTRITO FEDERAL | \n", + "ÓLEO DIESEL | \n", + "m3 | \n", + "0.00 | \n", + "2021-08-02 11:54:42.672039509 | \n", + "
| 12956 | \n", + "2020-9 | \n", + "DISTRITO FEDERAL | \n", + "ÓLEO DIESEL | \n", + "m3 | \n", + "0.00 | \n", + "2021-08-02 11:54:42.672039509 | \n", + "
| 12957 | \n", + "2020-10 | \n", + "DISTRITO FEDERAL | \n", + "ÓLEO DIESEL | \n", + "m3 | \n", + "NaN | \n", + "2021-08-02 11:54:42.672039509 | \n", + "
| 12958 | \n", + "2020-11 | \n", + "DISTRITO FEDERAL | \n", + "ÓLEO DIESEL | \n", + "m3 | \n", + "NaN | \n", + "2021-08-02 11:54:42.672039509 | \n", + "
| 12959 | \n", + "2020-12 | \n", + "DISTRITO FEDERAL | \n", + "ÓLEO DIESEL | \n", + "m3 | \n", + "NaN | \n", + "2021-08-02 11:54:42.672039509 | \n", + "
12960 rows × 6 columns
\n", + "