5. Slopes analysis, SOM…¶

# common 
import sys
import os
import os.path as op

# basic 
import xarray as xr
import numpy as np
import pandas as pd
from datetime import timedelta as td
from matplotlib import pyplot as plt
from pandas.plotting import register_matplotlib_converters

# interactive
from ipywidgets import interactive

# advanced
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
import dash  # (version 1.12.0) pip install dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output

# warnings
import warnings
warnings.filterwarnings("ignore")

# dev library 
sys.path.insert(0, os.getcwd())

# RBF module 
from slopes import Slopes_SOM
from slopes import consult

# pip renders for jupyter book
import plotly.io as pio
pio.renderers.default = "notebook"

# load the data
data = pd.read_pickle(op.join(os.getcwd(), '..', 'data', 'reconstructed', 
                              'surfbreaks_reconstructed_final.pkl'))
data['Index'] = data['Index'].where(data['Index']<1, 1) * 10
data = data.dropna(how='any', axis=0)

data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1008091 entries, 1979-02-01 02:00:00 to 2020-02-29 20:00:00
Data columns (total 27 columns):
 #   Column           Non-Null Count    Dtype  
---  ------           --------------    -----  
 Hs               1008091 non-null  float64
 Tp               1008091 non-null  float64
 Dir              1008091 non-null  float64
 Spr              1008091 non-null  float64
 W                1008091 non-null  float64
 DirW             1008091 non-null  float64
 DDir             1008091 non-null  float64
 DDirW            1008091 non-null  float64
 ocean_tide       1008091 non-null  float64
 Omega            1008091 non-null  float64
H_break          1008091 non-null  float64
DDir_R           1008091 non-null  float64
Slope            1008091 non-null  float64
Iribarren        1008091 non-null  float64
Hb_index         1008091 non-null  float64
Tp_index         1008091 non-null  float64
Spr_index        1008091 non-null  float64
Iribarren_index  1008091 non-null  float64
Dir_index        1008091 non-null  float64
DirW_index       1008091 non-null  float64
Index            1008091 non-null  float64
Hour             1008091 non-null  int64  
Day_Moment       1008091 non-null  object 
Month            1008091 non-null  int64  
Season           1008091 non-null  object 
Year             1008091 non-null  int64  
beach            1008091 non-null  object 
dtypes: float64(21), int64(3), object(3)
memory usage: 215.4+ MB

data.Index.plot.box()

<AxesSubplot:>

../_images/slopes_analysis_notebook_4_1.png

data.Hb_index.hist()

<AxesSubplot:>

../_images/slopes_analysis_notebook_5_1.png

data.Index.hist()

<AxesSubplot:>

../_images/slopes_analysis_notebook_6_1.png

consult(data=data,
        beaches=['segunda', 'laredo', 'forta'],
        day='2017-01-13',
        columns=['H_break', 'Hb_index', 'Index'])

	H_break	Hb_index	Index	beach
2017-01-13 03:00:00	3.857180	0.3	3.376	segunda
2017-01-13 08:00:00	3.926694	0.3	2.532	segunda
2017-01-13 13:00:00	3.804837	0.3	3.376	segunda
2017-01-13 18:00:00	4.560021	0.0	1.000	segunda
2017-01-13 23:00:00	3.389903	0.5	5.088	segunda
2017-01-13 03:00:00	3.747338	0.3	2.664	laredo
2017-01-13 08:00:00	3.819149	0.3	2.664	laredo
2017-01-13 13:00:00	3.717768	0.5	4.384	laredo
2017-01-13 18:00:00	4.273107	0.3	3.776	laredo
2017-01-13 23:00:00	3.435028	0.5	4.608	laredo
2017-01-13 03:00:00	1.646689	0.7	4.128	forta
2017-01-13 08:00:00	1.679265	0.7	5.216	forta
2017-01-13 13:00:00	1.637391	0.7	7.568	forta
2017-01-13 18:00:00	2.068835	1.0	9.438	forta
2017-01-13 23:00:00	1.480459	0.7	8.250	forta

consult(data=data,
        beaches=['segunda', 'laredo', 'forta'],
        day='2019-12-22',
        columns=['H_break', 'Hb_index', 'Index'])

	H_break	Hb_index	Index	beach
2019-12-22 01:00:00	3.646619	0.5	4.320	segunda
2019-12-22 06:00:00	3.164886	0.5	4.860	segunda
2019-12-22 11:00:00	2.948261	0.8	6.264	segunda
2019-12-22 16:00:00	3.183460	0.5	4.320	segunda
2019-12-22 21:00:00	2.933703	0.8	5.856	segunda
2019-12-22 01:00:00	3.782744	0.3	3.664	laredo
2019-12-22 06:00:00	3.431986	0.5	5.382	laredo
2019-12-22 11:00:00	3.289403	0.5	5.058	laredo
2019-12-22 16:00:00	3.717900	0.5	4.496	laredo
2019-12-22 21:00:00	3.537950	0.5	4.976	laredo
2019-12-22 01:00:00	1.630818	0.7	4.212	forta
2019-12-22 06:00:00	1.441102	0.7	4.356	forta
2019-12-22 11:00:00	1.392555	0.7	4.212	forta
2019-12-22 16:00:00	1.624850	0.7	4.212	forta
2019-12-22 21:00:00	1.505351	0.7	4.356	forta

consult(data=data,
        beaches=['brusco', 'canallave', 'laredo'],
        day='2003-06-20',
        columns=['H_break', 'Hb_index', 'Index'])

	H_break	Hb_index	Index	beach
2003-06-20 02:00:00	2.131825	1.0	6.798	canallave
2003-06-20 07:00:00	1.979701	1.0	5.168	canallave
2003-06-20 12:00:00	2.064915	1.0	5.168	canallave
2003-06-20 17:00:00	2.103380	1.0	1.000	canallave
2003-06-20 22:00:00	1.854655	1.0	7.106	canallave
2003-06-20 02:00:00	1.792632	1.0	6.886	brusco
2003-06-20 07:00:00	1.678462	0.7	3.472	brusco
2003-06-20 12:00:00	1.858943	1.0	1.000	brusco
2003-06-20 17:00:00	1.997746	1.0	1.000	brusco
2003-06-20 22:00:00	1.703752	1.0	1.000	brusco
2003-06-20 02:00:00	1.234815	0.7	1.000	laredo
2003-06-20 07:00:00	1.291503	0.7	1.000	laredo
2003-06-20 12:00:00	1.678732	0.7	1.000	laredo
2003-06-20 17:00:00	1.842311	1.0	1.000	laredo
2003-06-20 22:00:00	1.498014	0.7	1.000	laredo

def plot_scatter(x, y):
    fig = px.scatter(data[::100], x=x, y=y, color='beach')
    fig.show()

interactive_plot = interactive(plot_scatter, 
                               x=data.columns,
                               y=data.columns)
interactive_plot

def plot_bar(grouper, variable):
    fig = px.bar(data.groupby([grouper, 'beach']).mean().reset_index(),
                 y=variable,
                 x=grouper,
                 color='beach',
                 barmode='group')
    fig.show()

interactive_plot = interactive(plot_bar, 
                               grouper=['Hour', 'Day_Moment', 'Month', 'Season', 'Year'],
                               variable=data.columns)
interactive_plot

def plot_box(period, y):
    data_box = data.copy()
    if isinstance(period, np.int64):
        fig = px.box(data_box.where(data_box['Month']==period).dropna(how='all', axis=0), 
                     x='beach', y=y, title='Month: '+str(period))
    elif period=='all':
        fig = px.box(data_box, x='beach', y=y, title='Points: '+period)
    elif period in ['Winter', 'Spring', 'Summer', 'Autumn']:
        fig = px.box(data_box.where(data_box['Season']==period).dropna(how='all', axis=0), 
                     x='beach', y=y, title='Season: '+period)
    else:
        fig = px.box(data_box.where(data_box['Day_Moment']==period).dropna(how='all', axis=0), 
                     x='beach', y=y, title='Day moment: '+period)
    fig.show()
    
period = ['all'] + list(data.Season.unique()) + list(data.Month.unique()) + list(data.Day_Moment.unique())

interactive_plot = interactive(plot_box,
                               period=period,
                               y=data.columns)
interactive_plot

def plot_hist(period, x):
    data_hist = data[::100].copy()
    if isinstance(period, np.int64):
        fig = px.histogram(data_hist.where(data_hist['Month']==period).dropna(how='all', axis=0), 
                           x=x, color='beach', marginal='box', 
                           hover_data=data.columns, title='Month: '+str(period))
    elif period=='all':
        fig = px.histogram(data_hist, 
                           x=x, color='beach', marginal='box', 
                           hover_data=data.columns, title='Points: '+period)
    elif period in ['Winter', 'Spring', 'Summer', 'Autumn']:
        fig = px.histogram(data_hist.where(data_hist['Season']==period).dropna(how='all', axis=0), 
                           x=x, color='beach', marginal='box', 
                           hover_data=data.columns, title='Season: '+period)
    else:
        fig = px.histogram(data_hist.where(data_hist['Day_Moment']==period).dropna(how='all', axis=0), 
                           x=x, color='beach', marginal='box', 
                           hover_data=data.columns, title='Day moment: '+period)
    fig.show()
    
period = ['all'] + list(data.Season.unique()) + list(data.Month.unique()) + list(data.Day_Moment.unique())

interactive_plot = interactive(plot_hist,
                               period=period,
                               x=data.columns)
interactive_plot

def plot_prob(beach):
    histcolor = ['blue', 'green', 'yellow', 'orange', 'red', 'purple', 'black']
    data_prob = data.where(data['beach']==beach).dropna(how='all', axis=0).copy()
    data_prob = data_prob.groupby([data_prob.index.dayofyear, 
                                   pd.cut(data_prob['Index'],
                                          [0,1,3,5,7,8,9,10],
                                          right=True)])\
                .count().mean(axis=1) / (len(data_prob)/366)
    data_prob.name = 'Probability of RSI'
    fig = px.histogram(data_prob.reset_index(),
                       x='level_0', y='Probability of RSI',
                       color='Index',
                       color_discrete_map={key: value for (key, value) in zip(
                           data_prob.reset_index()['Index'].unique(),
                           histcolor)},
                       nbins=366, range_y=[0,1],
                       labels={'level_0': 'Day of year'},
                       title='Beach: ' + beach, width=900, height=400)
    fig.show()
    
interactive_plot = interactive(plot_prob,
                               beach=data.beach.unique())
interactive_plot

def plot_prob(beach, ini_year, end_year):
    histcolor = ['blue', 'green', 'yellow', 'orange', 'red', 'purple', 'black']
    data_prob = data.where(data['beach']==beach).dropna(how='all', axis=0).copy()
    data_prob = data.where((data['Year']>=ini_year) & (data['Year']<=end_year))\
                .dropna(how='all', axis=0).copy()
    data_prob = data_prob.groupby([pd.Grouper(freq='M'), 
                                   pd.cut(data_prob['Index'],
                                          [0,1,3,5,7,8,9,10],
                                          right=True)])\
                .count().mean(axis=1) / (len(data_prob)/(12*(end_year-ini_year + 1)))
    data_prob.name = 'Probability of RSI'
    fig = px.histogram(data_prob.reset_index(),
                       x='level_0', y='Probability of RSI',
                       color='Index',
                       color_discrete_map={key: value for (key, value) in zip(
                           data_prob.reset_index()['Index'].unique(),
                           histcolor)},
                       nbins=12*int(end_year-ini_year + 1), range_y=[0,1],
                       labels={'level_0': 'Historical month'},
                       title='Beach: ' + beach, width=900, height=400)
    fig.show()
    
interactive_plot = interactive(plot_prob,
                               beach=data.beach.unique(),
                               ini_year=data.Year.unique(),
                               end_year=data.Year.unique())
interactive_plot

def plot_prob_grouper(beach, grouper):
    histcolor = ['blue', 'green', 'yellow', 'orange', 'red', 'purple', 'black']
    data_prob = data.where(data['beach']==beach).dropna(how='all', axis=0).copy()
    data_prob = data_prob.groupby([data_prob[grouper], 
                                   pd.cut(data_prob['Index'],
                                          [0,1,3,5,7,8,9,10],
                                          right=True)])\
                .count().mean(axis=1) / (len(data_prob)/len(data[grouper].unique()))
    data_prob.name = 'Probability of RSI'
    fig = px.histogram(data_prob.reset_index(),
                       x=grouper, y='Probability of RSI',
                       color='Index',
                       color_discrete_map={key: value for (key, value) in zip(
                           data_prob.reset_index()['Index'].unique(),
                           histcolor)},
                       range_y=[0,1], nbins=len(data[grouper].unique()),
                       labels={'level_0': grouper},
                       title='Beach: ' + beach, width=900, height=400)
    fig.show()
    
grouper = ['Month', 'Season', 'Year']
    
interactive_plot = interactive(plot_prob_grouper,
                               beach=data.beach.unique(),
                               grouper=grouper)
interactive_plot

def plot_rose(beach):
    data_prob = data.where(data['beach']==beach).dropna(how='all', axis=0).copy()
    data_prob = data_prob.groupby([pd.cut(data_prob['Dir'],
                                          [0,20,40,60,80,100,120,140,160,180,
                                           200,220,240,260,280,300,320,340,360],
                                          right=True), 
                                   pd.cut(data_prob['Tp'],
                                          [0,6,8,10,12,14,16,18,24],
                                          right=True)]).mean()
    data_prob = data_prob['H_break'].rename('H_break_value', inplace=True).reset_index()
    data_prob = data_prob.astype({'Dir': 'str', 'Tp': 'str'}, copy=True)
    fig = px.bar_polar(data_prob, r='H_break_value', theta='Dir',
                       color='Tp', color_discrete_sequence=px.colors.sequential.Plasma_r)
    fig.show()
    
interactive_plot = interactive(plot_rose,
                               beach=data.beach.unique())
interactive_plot

slopes_som = Slopes_SOM(data)
som, data_mean, data_count = slopes_som.train(som_shape=(20,20), sigma=0.8, learning_rate=0.5,
                                              num_iteration=80000, plot_results=True)
slopes_som.plot_results(som, data_mean, data_count, plot_beaches=True, plot_months=True)

The following data will be trained: 

 
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1008091 entries, 1979-02-01 02:00:00 to 2020-02-29 20:00:00
Data columns (total 27 columns):
 #   Column           Non-Null Count    Dtype  
---  ------           --------------    -----  
 Hs               1008091 non-null  float64
 Tp               1008091 non-null  float64
 Dir              1008091 non-null  float64
 Spr              1008091 non-null  float64
 W                1008091 non-null  float64
 DirW             1008091 non-null  float64
 DDir             1008091 non-null  float64
 DDirW            1008091 non-null  float64
 ocean_tide       1008091 non-null  float64
 Omega            1008091 non-null  float64
H_break          1008091 non-null  float64
DDir_R           1008091 non-null  float64
Slope            1008091 non-null  float64
Iribarren        1008091 non-null  float64
Hb_index         1008091 non-null  float64
Tp_index         1008091 non-null  float64
Spr_index        1008091 non-null  float64
Iribarren_index  1008091 non-null  float64
Dir_index        1008091 non-null  float64
DirW_index       1008091 non-null  float64
Index            1008091 non-null  float64
Hour             1008091 non-null  int64  
Day_Moment       1008091 non-null  object 
Month            1008091 non-null  int64  
Season           1008091 non-null  object 
Year             1008091 non-null  int64  
beach            1008091 non-null  object 
dtypes: float64(21), int64(3), object(3)
memory usage: 215.4+ MB
None

../_images/slopes_analysis_notebook_18_1.png

../_images/slopes_analysis_notebook_18_2.png

../_images/slopes_analysis_notebook_18_3.png

../_images/slopes_analysis_notebook_18_4.png

../_images/slopes_analysis_notebook_18_5.png

slopes_som = Slopes_SOM(data, beach='farolillo')
som, data_mean, data_count = slopes_som.train(som_shape=(20,20), sigma=0.8, learning_rate=0.5,
                                              num_iteration=50000, plot_results=False)
slopes_som.plot_results(som, data_mean, data_count, second_plot='Hb_index', plot_months=True)

The following data will be trained in: farolillo

The sum off all probabilities is: 1.0

../_images/slopes_analysis_notebook_19_1.png

../_images/slopes_analysis_notebook_19_2.png

slopes_som = Slopes_SOM(data, beach='bederna')
som, data_mean, data_count = slopes_som.train(som_shape=(20,20), sigma=0.8, learning_rate=0.5,
                                              num_iteration=50000, plot_results=False)
slopes_som.plot_results(som, data_mean, data_count, second_plot='Hb_index', plot_months=True)

The following data will be trained in: bederna

The sum off all probabilities is: 1.0

../_images/slopes_analysis_notebook_20_1.png

../_images/slopes_analysis_notebook_20_2.png

slopes_som = Slopes_SOM(data, beach='oyambre')
som, data_mean, data_count = slopes_som.train(som_shape=(20,20), sigma=0.8, learning_rate=0.5,
                                              num_iteration=50000, plot_results=False)
slopes_som.plot_results(som, data_mean, data_count, second_plot='Hb_index', plot_months=True)

The following data will be trained in: oyambre

The sum off all probabilities is: 1.0

../_images/slopes_analysis_notebook_21_1.png

../_images/slopes_analysis_notebook_21_2.png

slopes_som = Slopes_SOM(data, beach='locos')
som, data_mean, data_count = slopes_som.train(som_shape=(20,20), sigma=0.8, learning_rate=0.5,
                                              num_iteration=50000, plot_results=False)
slopes_som.plot_results(som, data_mean, data_count, second_plot='Hb_index', plot_months=True)

The following data will be trained in: locos

The sum off all probabilities is: 0.9999999999999999

../_images/slopes_analysis_notebook_22_1.png

../_images/slopes_analysis_notebook_22_2.png

slopes_som = Slopes_SOM(data, beach='valdearenas')
som, data_mean, data_count = slopes_som.train(som_shape=(20,20), sigma=0.8, learning_rate=0.5,
                                              num_iteration=50000, plot_results=False)
slopes_som.plot_results(som, data_mean, data_count, second_plot='Hb_index', plot_months=True)

The following data will be trained in: valdearenas

The sum off all probabilities is: 1.0

../_images/slopes_analysis_notebook_23_1.png

../_images/slopes_analysis_notebook_23_2.png

slopes_som = Slopes_SOM(data, beach='canallave')
som, data_mean, data_count = slopes_som.train(som_shape=(20,20), sigma=0.8, learning_rate=0.5,
                                              num_iteration=50000, plot_results=False)
slopes_som.plot_results(som, data_mean, data_count, second_plot='Hb_index', plot_months=True)

The following data will be trained in: canallave

The sum off all probabilities is: 1.0

../_images/slopes_analysis_notebook_24_1.png

../_images/slopes_analysis_notebook_24_2.png

slopes_som = Slopes_SOM(data, beach='madero')
som, data_mean, data_count = slopes_som.train(som_shape=(20,20), sigma=0.8, learning_rate=0.5,
                                              num_iteration=50000, plot_results=False)
slopes_som.plot_results(som, data_mean, data_count, second_plot='Hb_index', plot_months=True)

The following data will be trained in: madero

The sum off all probabilities is: 1.0

../_images/slopes_analysis_notebook_25_1.png

../_images/slopes_analysis_notebook_25_2.png

slopes_som = Slopes_SOM(data, beach='segunda')
som, data_mean, data_count = slopes_som.train(som_shape=(20,20), sigma=0.8, learning_rate=0.5,
                                              num_iteration=50000, plot_results=False)
slopes_som.plot_results(som, data_mean, data_count, second_plot='Hb_index', plot_months=True)

The following data will be trained in: segunda

The sum off all probabilities is: 1.0

../_images/slopes_analysis_notebook_26_1.png

../_images/slopes_analysis_notebook_26_2.png

slopes_som = Slopes_SOM(data, beach='primera')
som, data_mean, data_count = slopes_som.train(som_shape=(20,20), sigma=0.8, learning_rate=0.5,
                                              num_iteration=50000, plot_results=False)
slopes_som.plot_results(som, data_mean, data_count, second_plot='Hb_index', plot_months=True)

The following data will be trained in: primera

The sum off all probabilities is: 1.0

../_images/slopes_analysis_notebook_27_1.png

../_images/slopes_analysis_notebook_27_2.png

slopes_som = Slopes_SOM(data, beach='pueblo')
som, data_mean, data_count = slopes_som.train(som_shape=(20,20), sigma=0.8, learning_rate=0.5,
                                              num_iteration=50000, plot_results=False)
slopes_som.plot_results(som, data_mean, data_count, second_plot='Hb_index', plot_months=True)

The following data will be trained in: pueblo

The sum off all probabilities is: 1.0

../_images/slopes_analysis_notebook_28_1.png

../_images/slopes_analysis_notebook_28_2.png

slopes_som = Slopes_SOM(data, beach='curva')
som, data_mean, data_count = slopes_som.train(som_shape=(20,20), sigma=0.8, learning_rate=0.5,
                                              num_iteration=50000, plot_results=False)
slopes_som.plot_results(som, data_mean, data_count, second_plot='DirW_index', plot_months=True)

The following data will be trained in: curva

The sum off all probabilities is: 1.0

../_images/slopes_analysis_notebook_29_1.png

../_images/slopes_analysis_notebook_29_2.png

slopes_som = Slopes_SOM(data, beach='brusco')
som, data_mean, data_count = slopes_som.train(som_shape=(20,20), sigma=0.8, learning_rate=0.5,
                                              num_iteration=50000, plot_results=False)
slopes_som.plot_results(som, data_mean, data_count, second_plot='Dir_index', plot_months=True)

The following data will be trained in: brusco

The sum off all probabilities is: 1.0

../_images/slopes_analysis_notebook_30_1.png

../_images/slopes_analysis_notebook_30_2.png

slopes_som = Slopes_SOM(data, beach='forta')
som, data_mean, data_count = slopes_som.train(som_shape=(20,20), sigma=0.8, learning_rate=0.5,
                                              num_iteration=50000, plot_results=False)
slopes_som.plot_results(som, data_mean, data_count, second_plot='Spr_index', plot_months=True)

The following data will be trained in: forta

The sum off all probabilities is: 1.0

../_images/slopes_analysis_notebook_31_1.png

../_images/slopes_analysis_notebook_31_2.png

slopes_som = Slopes_SOM(data, beach='laredo')
som, data_mean, data_count = slopes_som.train(som_shape=(20,20), sigma=0.8, learning_rate=0.5,
                                              num_iteration=50000, plot_results=False)
slopes_som.plot_results(som, data_mean, data_count, second_plot='Hb_index', plot_months=True)

The following data will be trained in: laredo

The sum off all probabilities is: 1.0

../_images/slopes_analysis_notebook_32_1.png

../_images/slopes_analysis_notebook_32_2.png

DeliWaves

5. Slopes analysis, SOM…¶