python_skills/example2_v3.py at main · bretonr/python_skills · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
## v3 (Jedi version)
## This version should be much better in terms of execution speed.
## Likely caveat is memory overflow due to the large size of the arrays!

## This example creates a Gaussian burst in a time series which is copied in
## different frequency channels and shifted according to the dispersion law.
## This produces a dynamic spectrum with a swepped up burst. Many of these
## dynamic spectra are generated with random pulse parameters in order to obtain
## a set of fake bursts. The data are generate at high resolution, and then
## downsampled to a lower resolution.


## Module import
## NOTE: Never import modules in the following way: from numpy import *
import numpy as np
import os
import matplotlib.pyplot as plt
from icecream import ic
ic.enable()

## Profiler
import yappi
RUN_YAPPI = True

## Setting random number generator
rng = np.random.default_rng(12345)


#------------------------------------------------------------------------------
## Define important functions

## NOTE: It is a good habit to gather functions at the top or in a separate file
## which is imported so that the actual script part is easier to follow

## NOTE: The following function examplifies how to implement a good docstring for a function.
def Delay(dm, frequencies):
    '''
    Calculates the delay at a given frequency according to the DM formula.

    Parameters
    ----------
    dm : float
        Dispersion measure.
    frequencies : float, ndarray
        Frequencies in GHz at which to calculate the delay.

    Returns
    -------
    t_delays : float, ndarray
        Time delay in ms at each frequency.
    '''
    t_delays = 4.15*dm*( (frequencies**-2) - (frequencies.max()**-2) )
    return t_delays

def Spectrum(flux0, freq0, alpha, freqs):
    flux = flux0*(freqs/freq0)**alpha
    return flux

def Calc_model(amp, t, t0, delays, duration):
    model = amp * np.exp( -0.5*(t-(t0+delays))**2 / duration**2 )
    return model

def Downsample(arr, fact0, fact1, fact2):
    new_shape = (arr.shape[0]//fact0, fact0, arr.shape[1]//fact1, fact1, arr.shape[2]//fact2, fact2)
    new_arr = arr.reshape(new_shape)
    new_arr = new_arr.sum(axis=(1,3,5))
    return new_arr


#------------------------------------------------------------------------------
## Parameters definition

## NOTE: Also a good habit to gather the variables that can be modified at the
## top. This is much better than hardcording values as you might forget to
## modify a value somewhere if a change takes place.

## Final data size
nchannels_final = 128
nsamp_final = 128
nchannels_downsamp = 4
nsamp_downsamp = 150

## Input parameters for the observing setup
bottom_freq = 1.200 # in GHz
bandwidth = 0.4     # in GHz
obs_length = 1000.  # in ms

## Other input parameters
DMmax = 300
burst_minwidth = 1.
burst_maxwidth = 50.
burst_minsnr = 5.
burst_maxsnr = 20.
avg_index = -1.4
std_index = 0.5
avg_chan_noise = 1.
std_chan_noise = 1.0

## Number of mock bursts
nbursts = 100

## Directory where to store file
data_dir = '.'

## Derived quantities
nchannels = nchannels_final * nchannels_downsamp
nsamp = nsamp_final * nsamp_downsamp
dt_sampling = obs_length / nsamp
chanwidth = bandwidth / nchannels
top_freq = bottom_freq + (nchannels-1)*chanwidth


#------------------------------------------------------------------------------
## Script section

## Profiler
if RUN_YAPPI:
    yappi.set_clock_type("cpu") # Use set_clock_type("wall") for wall time
    yappi.start()


## Initialising various arrays
## NOTE: the following commented are not so good as prone to not yield the right size
#freqs = np.arange(bottom_freq, bottom_freq+bandwidth, chanwidth)
#tstamp = np.arange(0., obs_length, dt_sampling)
freqs = np.arange(nchannels)*chanwidth + bottom_freq
tstamp = np.arange(nsamp)*dt_sampling


## Double checking that the array sizes are right
assert freqs.size == nchannels
assert tstamp.size == nsamp

noise_arr = np.zeros((nbursts, nchannels_final, nsamp_final))
model_arr = np.zeros((nbursts, nchannels_final, nsamp_final))
data_arr = np.zeros((nbursts, nchannels_final, nsamp_final))

## Calculate the time delay at each frequency for a DM of 1
t_at_freqs_DM1 = Delay(1., freqs)


## Temporary arrays to store parameters
noise_arr_tmp = np.zeros((nchannels, nsamp))
model_arr_tmp = np.zeros((nchannels, nsamp))
data_arr_tmp = np.zeros((nchannels, nsamp))

## Generate burst parameters
DMs = rng.uniform(0., DMmax, size=nbursts)
t_bursts = rng.uniform(0, obs_length, size=nbursts)
duration_bursts = rng.uniform(burst_minwidth, burst_maxwidth, size=nbursts)
amp_bursts = rng.uniform(burst_minwidth, burst_maxwidth, size=nbursts)
alphas = rng.normal(loc=avg_index, scale=std_index, size=nbursts)

## Calculate the time delay at each frequency for a DM of 1
t_at_freqs = t_at_freqs_DM1*DMs[:,None]
ic(t_at_freqs.shape)

## Calculate the flux amplitude at each frequency
flux_at_freqs = Spectrum(amp_bursts[:,None], bottom_freq, alphas[:,None], freqs)
ic(flux_at_freqs.shape)

## For each channel, draw a noise level from a lognormal distribution,
## then generate white noise according to this noise level.
noise_std_per_channel = rng.lognormal(mean=avg_chan_noise, sigma=std_chan_noise, size=(nbursts, nchannels))
ic(noise_std_per_channel.shape)
noise_arr_tmp = rng.normal(loc=0., scale=1.0, size=(nbursts, nchannels, nsamp))*noise_std_per_channel[:,:,None]
ic(noise_arr_tmp.shape)

## Generate the model
model_arr_tmp = Calc_model(flux_at_freqs[:,:,None], tstamp[None,None,:], t_bursts[:,None,None], t_at_freqs[:,:,None], duration_bursts[:,None,None])
ic(model_arr_tmp.shape)

## Combining the data
data_arr_tmp = noise_arr_tmp + model_arr_tmp

noise_arr = Downsample(noise_arr_tmp, 1, nchannels_downsamp, nsamp_downsamp)
model_arr = Downsample(model_arr_tmp, 1, nchannels_downsamp, nsamp_downsamp)
data_arr = Downsample(data_arr_tmp, 1, nchannels_downsamp, nsamp_downsamp)


## Profiler
if RUN_YAPPI:
    yappi.get_func_stats().print_all()
    yappi.get_thread_stats().print_all()


#------------------------------------------------------------------------------
## Saving the results
np.save(os.path.join(data_dir,'noise'), noise_arr)
np.save(os.path.join(data_dir,'model'), model_arr)
np.save(os.path.join(data_dir,'data'), data_arr)


#------------------------------------------------------------------------------
## Plotting the last burst so we can visualise

if 0:

    ## Plot cavas for the original data
    fig1, ax1 = plt.subplots(ncols=3, figsize=(12,4))
    ax1[0].imshow(noise_arr_tmp, origin='lower', aspect='auto', extent=[0, obs_length, bottom_freq, top_freq])
    ax1[1].imshow(model_arr_tmp, origin='lower', aspect='auto', extent=[0, obs_length, bottom_freq, top_freq])
    ax1[2].imshow(data_arr_tmp, origin='lower', aspect='auto', extent=[0, obs_length, bottom_freq, top_freq])
    ax1[0].set_title('Noise Original')
    ax1[1].set_title('Model Original')
    ax1[2].set_title('Data Original')
    ax1[0].set_xlabel('Time (ms)')
    ax1[1].set_xlabel('Time (ms)')
    ax1[2].set_xlabel('Time (ms)')
    ax1[0].set_ylabel('Frequency (GHz)')
    ax1[1].set_ylabel('Frequency (GHz)')
    ax1[2].set_ylabel('Frequency (GHz)')
    fig1.subplots_adjust(left=0.06, right=0.98, wspace=0.3)

    ## Plot cavas for the downsampled data
    fig2, ax2 = plt.subplots(ncols=3, figsize=(12,4))
    ax2[0].imshow(noise_arr[-1], origin='lower', aspect='auto', extent=[0, obs_length, bottom_freq, top_freq])
    ax2[1].imshow(model_arr[-1], origin='lower', aspect='auto', extent=[0, obs_length, bottom_freq, top_freq])
    ax2[2].imshow(data_arr[-1], origin='lower', aspect='auto', extent=[0, obs_length, bottom_freq, top_freq])
    ax2[0].set_title('Noise Downsampled')
    ax2[1].set_title('Model Downsampled')
    ax2[2].set_title('Data Downsampled')
    ax2[0].set_xlabel('Time (ms)')
    ax2[1].set_xlabel('Time (ms)')
    ax2[2].set_xlabel('Time (ms)')
    ax2[0].set_ylabel('Frequency (GHz)')
    ax2[1].set_ylabel('Frequency (GHz)')
    ax2[2].set_ylabel('Frequency (GHz)')
    fig2.subplots_adjust(left=0.06, right=0.98, wspace=0.3)