diff --git a/Raizen.ipynb b/Raizen.ipynb new file mode 100644 index 00000000..f85c14f2 --- /dev/null +++ b/Raizen.ipynb @@ -0,0 +1,746 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "def monthToNum(shortMonth):\n", + " return {\n", + " 'Jan': 1,\n", + " 'Fev': 2,\n", + " 'Mar': 3,\n", + " 'Abr': 4,\n", + " 'Mai': 5,\n", + " 'Jun': 6,\n", + " 'Jul': 7,\n", + " 'Ago': 8,\n", + " 'Set': 9, \n", + " 'Out': 10,\n", + " 'Nov': 11,\n", + " 'Dez': 12\n", + " }[shortMonth]" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "def function(df_in):\n", + " \n", + " # criando uma cópia do dataframe original para não manipular o original\n", + " df = df_in.copy()\n", + " \n", + " # ajuste das colunas para o dataframe final\n", + " column_names_to_change = {'COMBUSTÍVEL': 'product', 'ESTADO': 'uf', 'UNIDADE': 'unit' }\n", + " final = pd.DataFrame(columns = ['product', 'ANO', 'uf', 'unit', 'month', 'volume'])\n", + "\n", + " # mudando nome da colunas e retirando colunas desnecessárias\n", + " df.rename(columns = column_names_to_change, inplace = True)\n", + " df.drop('REGIÃO', inplace = True, axis=1)\n", + " \n", + " # ajustar o nome de cada combustivel\n", + " df['product'] = df['product'].apply(lambda x: x.split(' (')[0])\n", + "\n", + " # nome das colunas separadas em duas partes: x = left & y = right\n", + " x = df.columns[0:4] # combustivel, ano, estado e unidade\n", + " y = df.columns[4:-1] # 12 meses do ano\n", + " \n", + " valores_errados = 0\n", + " \n", + " # iterando no dataframe inteiro\n", + " for i in range(len(df)):\n", + " # coletando o lado esquerdo de cada linha e copiando a mesma para os 12 meses do ano\n", + " aux_left = pd.DataFrame(df[x].loc[i]).transpose()\n", + " aux_left = pd.concat([aux_left]*len(y), ignore_index=True)\n", + " \n", + " # coletando o lado direito de cada linha\n", + " aux_right = pd.DataFrame(df[y].loc[i]).reset_index(drop = False)\n", + " aux_right.columns = ['month', 'volume']\n", + " \n", + " # checando se o valor coletado dos meses é igual ao total informado originalmente\n", + " if sum(aux_right['volume']) != df['TOTAL'].loc[i]:\n", + " valores_errados += 1\n", + " \n", + " # juntando o lado direito e esquerdo novamente\n", + " result = pd.concat([aux_left, aux_right], axis=1)\n", + " \n", + " # \"appendando\" a linha formatada para o dataframe final\n", + " final = final.append(result, ignore_index = True)\n", + " \n", + " \n", + " # convertendo o nome do mês para o número\n", + " final['month'] = final['month'].apply(lambda x: monthToNum(x))\n", + " aux = []\n", + " for i in range(len(final)):\n", + " aux.append(str(final['ANO'][i]) + '-' + str(final['month'][i]))\n", + " \n", + " # criação da coluna year_month e excluindo as colunas desnecessárias\n", + " final['year_month'] = aux\n", + " final.drop(['ANO', 'month'], inplace = True, axis=1)\n", + " \n", + " # criação da coluna created_at\n", + " created_at = [pd.Timestamp(time.time(), unit='s')]*len(final)\n", + " final['created_at'] = created_at\n", + " \n", + " print(\"Quantidade de Totais errados:\", valores_errados)\n", + " \n", + " return final[['year_month', 'uf', 'product', 'unit', 'volume', 'created_at']]" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "import pandas as pd\n", + "\n", + "import time" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
COMBUSTÍVELANOREGIÃOESTADOUNIDADEJanFevMarAbrMaiJunJulAgoSetOutNovDezTOTAL
0GASOLINA C (m3)2000REGIÃO NORTERONDÔNIAm39563.26311341.2299369.74610719.98311165.96812312.45111220.97012482.28113591.12211940.57011547.57610818.094136073.253
1GASOLINA C (m3)2000REGIÃO NORTEACREm33065.7583495.2902946.9303023.9203206.9303612.5803264.4603835.7403676.5713225.6103289.7183358.34640001.853
2GASOLINA C (m3)2000REGIÃO NORTEAMAZONASm317615.60420258.20018741.34419604.02320221.67420792.61619912.89821869.33821145.64320633.17520766.91821180.919242742.352
3GASOLINA C (m3)2000REGIÃO NORTERORAIMAm33259.3003636.2163631.5693348.4163394.0164078.6163346.6164029.9004358.5163716.0323200.4003339.33243338.929
4GASOLINA C (m3)2000REGIÃO NORTEPARÁm328830.47932297.04727310.97929396.38426511.00936553.25031807.84031009.97229755.90728661.95128145.78429294.796359575.398
\n", + "
" + ], + "text/plain": [ + " COMBUSTÍVEL ANO REGIÃO ESTADO UNIDADE Jan \\\n", + "0 GASOLINA C (m3) 2000 REGIÃO NORTE RONDÔNIA m3 9563.263 \n", + "1 GASOLINA C (m3) 2000 REGIÃO NORTE ACRE m3 3065.758 \n", + "2 GASOLINA C (m3) 2000 REGIÃO NORTE AMAZONAS m3 17615.604 \n", + "3 GASOLINA C (m3) 2000 REGIÃO NORTE RORAIMA m3 3259.300 \n", + "4 GASOLINA C (m3) 2000 REGIÃO NORTE PARÁ m3 28830.479 \n", + "\n", + " Fev Mar Abr Mai Jun Jul \\\n", + "0 11341.229 9369.746 10719.983 11165.968 12312.451 11220.970 \n", + "1 3495.290 2946.930 3023.920 3206.930 3612.580 3264.460 \n", + "2 20258.200 18741.344 19604.023 20221.674 20792.616 19912.898 \n", + "3 3636.216 3631.569 3348.416 3394.016 4078.616 3346.616 \n", + "4 32297.047 27310.979 29396.384 26511.009 36553.250 31807.840 \n", + "\n", + " Ago Set Out Nov Dez TOTAL \n", + "0 12482.281 13591.122 11940.570 11547.576 10818.094 136073.253 \n", + "1 3835.740 3676.571 3225.610 3289.718 3358.346 40001.853 \n", + "2 21869.338 21145.643 20633.175 20766.918 21180.919 242742.352 \n", + "3 4029.900 4358.516 3716.032 3200.400 3339.332 43338.929 \n", + "4 31009.972 29755.907 28661.951 28145.784 29294.796 359575.398 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "oil_df = pd.read_excel('vendas-combustiveis-m3.xls', sheet_name = 'oil_uf_product')\n", + "diesel_df = pd.read_excel('vendas-combustiveis-m3.xls', sheet_name = 'diesel_uf_type')\n", + "\n", + "oil_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Quantidade de Totais errados: 216\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
year_monthufproductunitvolumecreated_at
02000-1RONDÔNIAGASOLINA Cm39563.2630002021-08-02 11:54:31.704335213
12000-2RONDÔNIAGASOLINA Cm311341.2290002021-08-02 11:54:31.704335213
22000-3RONDÔNIAGASOLINA Cm39369.7460002021-08-02 11:54:31.704335213
32000-4RONDÔNIAGASOLINA Cm310719.9830002021-08-02 11:54:31.704335213
42000-5RONDÔNIAGASOLINA Cm311165.9680002021-08-02 11:54:31.704335213
.....................
544272020-8DISTRITO FEDERALGLPm315358.4909422021-08-02 11:54:31.704335213
544282020-9DISTRITO FEDERALGLPm313937.4510872021-08-02 11:54:31.704335213
544292020-10DISTRITO FEDERALGLPm3NaN2021-08-02 11:54:31.704335213
544302020-11DISTRITO FEDERALGLPm3NaN2021-08-02 11:54:31.704335213
544312020-12DISTRITO FEDERALGLPm3NaN2021-08-02 11:54:31.704335213
\n", + "

54432 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " year_month uf product unit volume \\\n", + "0 2000-1 RONDÔNIA GASOLINA C m3 9563.263000 \n", + "1 2000-2 RONDÔNIA GASOLINA C m3 11341.229000 \n", + "2 2000-3 RONDÔNIA GASOLINA C m3 9369.746000 \n", + "3 2000-4 RONDÔNIA GASOLINA C m3 10719.983000 \n", + "4 2000-5 RONDÔNIA GASOLINA C m3 11165.968000 \n", + "... ... ... ... ... ... \n", + "54427 2020-8 DISTRITO FEDERAL GLP m3 15358.490942 \n", + "54428 2020-9 DISTRITO FEDERAL GLP m3 13937.451087 \n", + "54429 2020-10 DISTRITO FEDERAL GLP m3 NaN \n", + "54430 2020-11 DISTRITO FEDERAL GLP m3 NaN \n", + "54431 2020-12 DISTRITO FEDERAL GLP m3 NaN \n", + "\n", + " created_at \n", + "0 2021-08-02 11:54:31.704335213 \n", + "1 2021-08-02 11:54:31.704335213 \n", + "2 2021-08-02 11:54:31.704335213 \n", + "3 2021-08-02 11:54:31.704335213 \n", + "4 2021-08-02 11:54:31.704335213 \n", + "... ... \n", + "54427 2021-08-02 11:54:31.704335213 \n", + "54428 2021-08-02 11:54:31.704335213 \n", + "54429 2021-08-02 11:54:31.704335213 \n", + "54430 2021-08-02 11:54:31.704335213 \n", + "54431 2021-08-02 11:54:31.704335213 \n", + "\n", + "[54432 rows x 6 columns]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "oil_final = function(oil_df)\n", + "\n", + "oil_final" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "year_month object\n", + "uf object\n", + "product object\n", + "unit object\n", + "volume float64\n", + "created_at datetime64[ns]\n", + "dtype: object" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "oil_final.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Quantidade de Totais errados: 135\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
year_monthufproductunitvolumecreated_at
02013-1RONDÔNIAÓLEO DIESEL S-10m33517.602021-08-02 11:54:42.672039509
12013-2RONDÔNIAÓLEO DIESEL S-10m33681.702021-08-02 11:54:42.672039509
22013-3RONDÔNIAÓLEO DIESEL S-10m34700.672021-08-02 11:54:42.672039509
32013-4RONDÔNIAÓLEO DIESEL S-10m35339.202021-08-02 11:54:42.672039509
42013-5RONDÔNIAÓLEO DIESEL S-10m36166.402021-08-02 11:54:42.672039509
.....................
129552020-8DISTRITO FEDERALÓLEO DIESELm30.002021-08-02 11:54:42.672039509
129562020-9DISTRITO FEDERALÓLEO DIESELm30.002021-08-02 11:54:42.672039509
129572020-10DISTRITO FEDERALÓLEO DIESELm3NaN2021-08-02 11:54:42.672039509
129582020-11DISTRITO FEDERALÓLEO DIESELm3NaN2021-08-02 11:54:42.672039509
129592020-12DISTRITO FEDERALÓLEO DIESELm3NaN2021-08-02 11:54:42.672039509
\n", + "

12960 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " year_month uf product unit volume \\\n", + "0 2013-1 RONDÔNIA ÓLEO DIESEL S-10 m3 3517.60 \n", + "1 2013-2 RONDÔNIA ÓLEO DIESEL S-10 m3 3681.70 \n", + "2 2013-3 RONDÔNIA ÓLEO DIESEL S-10 m3 4700.67 \n", + "3 2013-4 RONDÔNIA ÓLEO DIESEL S-10 m3 5339.20 \n", + "4 2013-5 RONDÔNIA ÓLEO DIESEL S-10 m3 6166.40 \n", + "... ... ... ... ... ... \n", + "12955 2020-8 DISTRITO FEDERAL ÓLEO DIESEL m3 0.00 \n", + "12956 2020-9 DISTRITO FEDERAL ÓLEO DIESEL m3 0.00 \n", + "12957 2020-10 DISTRITO FEDERAL ÓLEO DIESEL m3 NaN \n", + "12958 2020-11 DISTRITO FEDERAL ÓLEO DIESEL m3 NaN \n", + "12959 2020-12 DISTRITO FEDERAL ÓLEO DIESEL m3 NaN \n", + "\n", + " created_at \n", + "0 2021-08-02 11:54:42.672039509 \n", + "1 2021-08-02 11:54:42.672039509 \n", + "2 2021-08-02 11:54:42.672039509 \n", + "3 2021-08-02 11:54:42.672039509 \n", + "4 2021-08-02 11:54:42.672039509 \n", + "... ... \n", + "12955 2021-08-02 11:54:42.672039509 \n", + "12956 2021-08-02 11:54:42.672039509 \n", + "12957 2021-08-02 11:54:42.672039509 \n", + "12958 2021-08-02 11:54:42.672039509 \n", + "12959 2021-08-02 11:54:42.672039509 \n", + "\n", + "[12960 rows x 6 columns]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "diesel_final = function(diesel_df)\n", + "\n", + "diesel_final" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "year_month object\n", + "uf object\n", + "product object\n", + "unit object\n", + "volume float64\n", + "created_at datetime64[ns]\n", + "dtype: object" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "diesel_final.dtypes" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}