aboutsummaryrefslogtreecommitdiffstats
path: root/Helpers/formation_helpers.py
blob: 5587c9ecb0f52c7872d7dff77837f0cf423cae2f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
"""
Helpers for Formation Simulations.
"""

import numpy as np
import stats_helpers as sh
import pandas as pd


def return_sources(pid, dfc):
    """
    Construct List of Mass Sources for a given Particle ID.
    Can be used to build Merger Trees.

    @todo: Accelerate. Rewrite in Fortran?

    @param pid: Particle ID - [Integer]
    @param dfc: Collision list - [Pandas Dataframe from Io_Helpers] 
    @return sources: Source Particle IDs - [Numpy Array]
    """
    if int(pd.__version__.split('.')[1]) >= 17:
       dfc = dfc.sort_values(by='time', ascending=False) 
    else:
        dfc = dfc.sort(columns="time", ascending=False)
    sources = np.zeros(len(dfc)+1) * np.nan
    sources[0] = pid
    for ii, row in dfc.reset_index(drop=True).iterrows():
        if row.pidi in sources:
            sources[ii+1] = int(row.pidj)
        elif row.pidj in sources:
            sources[ii+1] = int(row.pidi)
    sources = sources[~np.isnan(sources)]
    sources = np.asarray(sources, dtype=np.int64)
    return sources


def assign_wmf_raymond2004(a):
    """
    Cf. Sec. 2.2 and Fig. 2 in Raymond+ (2004).
    http://adsabs.harvard.edu/abs/2004Icar..168....1R

    @param: a - Semi Major Axis (Float)
    @return: wmf - Water Mass Fraction (Float)
    """
    # percent
    if a < 2.0:
        wmf = 0.001
    elif a >= 2.0 and a < 2.5:
        wmf = 0.1
    elif a>= 2.5:
        wmf = 5.0
    # fraction
    wmf /= 100.0
    # return
    return wmf


def assign_wmf_ronco2014(a):
    """
    Cf. Sec. 2 in Ronco+ (2014).
    http://adsabs.harvard.edu/abs/2014A&A...567A..54R

    @param: a - Semi Major Axis (Float)
    @return: wmf - Water Mass Fraction (Float)
    """
    # percent
    if a < 2.0:
        wmf = 0.001
    elif a >= 2.0 and a < 2.5:
        wmf = 0.1
    elif a>= 2.5 and a < 2.7:
        wmf = 5.0
    else:
        wmf = 50.0
    # fraction
    wmf /= 100.0
    # return
    return wmf


def compute_kde(df, evaluation_range, evaluation_range_step, variable, \
                cov_tight_factor=4.0, integrates_to=-1, noweights=False):
    """
    Compute Kernel Density Estimate for Formation Runs.

    Options:
    1. Mass-Weighted Semi-Major Axis Distribution
    2. Mass-Weighted Eccentricity Distribution
    3. Mass-Weighted Inclination Distribution
    4. Water Mass Fraction
    5. Mass Function.

    Picking a cov_tight_factor is a bit of a dark art. Too small and the 
    KDE is way too smooth. Too large and the resulting KDE doesn't smooth over
    adjacent points at all. Usually, power of 2 in the range 2 to 12 are 
    reasonable. Use common sense. (@todo: Add literature link.)

    The mass function dN/d(logM) integrates to the variable integrates_to.
    This defaults to -1, where it integrates to the number of particles.

    @param: df - Genga Coordinate DataFrame [Pandas DataFrame]
    @param: evaluation_range - Range over which to compute KDE [Np Float Array]
    @param: evaluation_range_step - Step size for Evaluation Range [Float]
    @param: variable: - Variable for which to compute KDE (a,e,i,m) [String]
    @param: cov_tight_factor - Tighten covariance by this factor [Float]
    @param: integrates_to - KDE integrates to this value (dN/dm = X) [Float]
    @return: kde_evaluated - KDE evaluated on evaluation_range [Np Float Array]
    """

    # Select Variable to Fit KDE
    if variable == 'a':
        input_array = np.asarray(df.a)
    elif variable == 'e':
        input_array = np.asarray(df.e)
    elif variable == 'i':
        input_array = np.asarray(df.i)
    elif variable == 'wmf':
        input_array = np.log10(np.asarray(df.wmf_02))
    elif variable == 'Qs':
        input_array = np.log10(np.asarray(df.Qs))
    elif variable == 'm':
        input_array = np.log10(np.asarray(df.mass))
    else:
        raise Exception("Invalid Variable Requested.")

    # Set Weights
    if variable in [ 'a', 'e', 'i' ]:
        weights = np.asarray(df.mass/df.mass.max())
    else:
        weights = np.ones_like(df.mass)

    # Override Weights?
    if noweights:
        weights = np.ones_like(df.mass)
        
    # Compute KDE
    cv = sh.Covariator(np.atleast_2d(input_array), weights)
    inv_cov, norm_factor = cv(bw_method = 'scott')
    inv_cov *= cov_tight_factor
    kde = sh.gaussian_kde(np.atleast_2d(input_array), weights, \
                       inv_cov, norm_factor)
    kde_evaluated = kde(evaluation_range)

    # Normalize to Total Mass (Int{dM/d{a,e,i}} = Mass)
    if variable in [ 'a', 'e', 'i' ]:
        norm = np.sum(df.mass) / ( np.sum(kde_evaluated) * evaluation_range_step )

    # Normalize to Total Number of Particles (Int{dN/dM} = N)
    else:
        # dmx = np.diff(10.0**evaluation_range) # Midpoints in mrange, convert to lin
        dmx = np.diff(evaluation_range)
        kde_evaluated_x = \
            (kde_evaluated[1:] + kde_evaluated[:-1]) / 2.0 # Linear Interp.
        N_0 = np.sum(kde_evaluated_x * dmx)
        if integrates_to > 0.0:
            norm = integrates_to / N_0
        else:
            norm = len(df) / N_0

    # Apply Normalization
    kde_evaluated *= norm

    # Return
    return kde_evaluated


def compute_wmf(dfo, dfo_t0, dfc, showstep=False):
    """
    Compute Water Mass Fraction for all Particles.
    Build Source List for Output, Compute WMF from Precursors.

    @param: dfo - Coordinate Output @ Time [Pandas Dataframe]
    @param: dfo_t0 - Outout @ Initial Time [Pandas Dataframe]
    @param: dfc - Collision List [Pandas Dataframe]
    @return: dfo - Coordinate Output @ Time w/ WMF Fields [Pandas Dataframe]
    """

    wmf_01 = np.ones(len(dfo)) * np.nan
    wmf_02 = np.ones(len(dfo)) * np.nan
    
    a0_10 = np.ones(len(dfo)) * np.nan
    a0_50 = np.ones(len(dfo)) * np.nan
    a0_90 = np.ones(len(dfo)) * np.nan

    dfc_now = dfc[dfc.time<=dfo.time.iloc[0]]
    
    # The Murder Loop
    # @todo - Accelerate? Rewrite Source List Construction in Fortran?
    for ii, dfo_row in enumerate(dfo.iterrows()):
        # Info
        if showstep:
            if ii % int(len(dfo)/8) == 0:
                print "%i/%i" % (ii,len(dfo))
        
        # Extract Series from One-Row Dataframe
        dfo_loc = dfo_row[1]
        
        # Identify Source Particles
        sources = \
            return_sources(int(dfo_loc.pid), \
                           dfc_now[dfc_now.ifname == int(dfo_loc.ifname)])
        dfo_sources = \
            dfo_t0[(dfo_t0.pid.isin(sources)) & \
                   (dfo_t0.ifname == int(dfo_loc.ifname))].copy()
        
        # Compute WMF (Raymond+ 2004, Ronco+ 2014)
        dfo_sources.loc[:,'wmf_01'] = \
            pd.Series(dfo_sources.a.apply(assign_wmf_raymond2004), \
                      index=dfo_sources.index)
        dfo_sources.loc[:,'wmf_02'] = \
            pd.Series(dfo_sources.a.apply(assign_wmf_ronco2014), \
                      index=dfo_sources.index)
        wmf_01[ii] = np.sum(dfo_sources.mass * dfo_sources.wmf_01) / \
            np.sum(dfo_sources.mass)
        wmf_02[ii] = np.sum(dfo_sources.mass * dfo_sources.wmf_02) / \
            np.sum(dfo_sources.mass)

        # Stats for Source Distribution
        a0_10[ii] = np.percentile(dfo_sources.a, 10)
        a0_50[ii] = np.percentile(dfo_sources.a, 50)
        a0_90[ii] = np.percentile(dfo_sources.a, 90)
    
    # Append WMF
    dfo.loc[:,'wmf_01'] = wmf_01
    dfo.loc[:,'wmf_02'] = wmf_02

    # Append Stats for Source Distribution
    dfo.loc[:,'a0_10'] = a0_10
    dfo.loc[:,'a0_50'] = a0_50
    dfo.loc[:,'a0_90'] = a0_90
    
    # Return
    return dfo


def compute_wmf_wrapper(wrapper):
    """
    Wrapper for WMF Computation.
    Expects Arguments in List. Unfolds, and Calls WMF Computation.

    @param: wrapper - List of Coordinates, Collisions [Python List]
                      Cf. compute_wrapper() Doc
    @return: dfo - Coordinate Output @ Time w/ WMF Fields [Pandas Dataframe]
    """
    
    # Unwrap
    dfo = wrapper[0]
    dfo_t0 = wrapper[1]
    dfc = wrapper[2]
    
    # Call Unwrapped
    dfo = compute_wmf(dfo, dfo_t0, dfc)
    
    # Return
    return dfo