# 17: MTC Expanded MNL Mode Choice

In [None]:
# TEST
import pandas as pd

import larch as lx

pd.set_option("display.max_columns", 999)
pd.set_option("expand_frame_repr", False)
pd.set_option("display.precision", 3)

For this example, we're going to re-create model 17 from the
[Self Instructing Manual](http://www.caee.utexas.edu/prof/Bhat/COURSES/LM_Draft_060131Final-060630.pdf). (pp. 128)

In [None]:
import larch as lx

lx.__version__

In [None]:
d = lx.examples.MTC()
m = lx.Model(d, compute_engine="numba")

We will use the usual choice and availability variables.

In [None]:
m.availability_ca_var = "avail"
m.choice_ca_var = "chose"

In [None]:
from larch import P, X

m.utility_ca = (
    +X("totcost/hhinc") * P("costbyincome")
    + X("tottime * (altid <= 4)") * P("motorized_time")
    + X("tottime * (altid >= 5)") * P("nonmotorized_time")
    + X("ovtt/dist * (altid <= 4)") * P("motorized_ovtbydist")
)

The "totcost/hhinc" data is computed once as a new variable when loading the model data.
The same applies for tottime filtered by motorized modes (we harness the convenient fact
that all the motorized modes have identifying numbers 4 or less), and "ovtt/dist".

In [None]:
for a in [4, 5, 6]:
    m.utility_co[a] += X("hhinc") * P(f"hhinc#{a}")

Since the model we want to create groups together DA, SR2 and SR3+ jointly as
reference alternatives with respect to income, we can simply omit all of these alternatives
from the block that applies to **hhinc**.

For vehicles per worker, the preferred model include a joint parameter on SR2 and SR3+,
but not including DA and not fixed at zero.  Here we might use a shadow_parameter (also
called an alias in some places), which allows
us to specify one or more parameters that are simply a fixed proportion of another parameter.
For example, we can say that vehbywrk_SR2 will be equal to vehbywrk_SR.

In [None]:
for i in d["alt_names"][1:3]:
    name = str(i.values)
    a = int(i.altid)
    m.utility_co[a] += (
        +X("vehbywrk") * P("vehbywrk_SR")
        + X("wkccbd+wknccbd") * P("wkcbd_" + name)
        + X("wkempden") * P("wkempden_" + name)
        + P("ASC_" + name)
    )

for i in d["alt_names"][3:]:
    name = str(i.values)
    a = int(i.altid)
    m.utility_co[a] += (
        +X("vehbywrk") * P("vehbywrk_" + name)
        + X("wkccbd+wknccbd") * P("wkcbd_" + name)
        + X("wkempden") * P("wkempden_" + name)
        + P("ASC_" + name)
    )

We didn't explicitly define our parameters first, which is fine; Larch will
find them in the utility functions (or elsewhere in more complex models).
But they may be found in a weird order that is hard to read in reports.
We can define an ordering scheme by assigning to the parameter_groups attribute,
like this:

In [None]:
m.ordering = (
    (
        "LOS",
        ".*cost.*",
        ".*time.*",
        ".*dist.*",
    ),
    (
        "Zonal",
        "wkcbd.*",
        "wkempden.*",
    ),
    (
        "Household",
        "hhinc.*",
        "vehbywrk.*",
    ),
    (
        "ASCs",
        "ASC.*",
    ),
)
m.set_cap(25)

Each item in parameter_ordering is a tuple, with a label and one or more regular expressions,
which will be compared against
all the parameter names.  Any names that match will be pulled out and put into the
reporting order sequentially.  Thus if a parameter name would match more than one
regex, it will appear in the ordering only for the first match.


Having created this model, we can then estimate it:

In [None]:
result = m.maximize_loglike(stderr=True, options={"maxiter": 1000, "ftol": 1e-10})

*Note we have set the convergence tolerance to be excessively strict here to ensure
that the optimization results remain steady across platforms with different default settings.*

In [None]:
# TEST
r = result
from pytest import approx

assert r.loglike == approx(-3444.185105027836)
assert r.n_cases == 5029
assert "success" in r.message.lower()

In [None]:
# TEST
revealed_x = dict(zip(m.pnames, r.x))
expected_x = {
    "ASC_Bike": -1.6262943624447215,
    "ASC_SR2": -1.8079290469266305,
    "ASC_SR3+": -3.4332103811742303,
    "ASC_Transit": -0.6885545098242597,
    "ASC_Walk": 0.0726870281895153,
    "costbyincome": -0.05237387934320275,
    "hhinc#4": -0.005305496592916417,
    "hhinc#5": -0.008655283203651252,
    "hhinc#6": -0.006001173483271529,
    "motorized_ovtbydist": -0.13278580524985106,
    "motorized_time": -0.02016680241721925,
    "nonmotorized_time": -0.045501011178961354,
    "vehbywrk_Bike": -0.7029295169096494,
    "vehbywrk_SR": -0.3164671378998241,
    "vehbywrk_Transit": -0.9453446457215215,
    "vehbywrk_Walk": -0.7216758952894061,
    "wkcbd_Bike": 0.49873916547630665,
    "wkcbd_SR2": 0.2592843053485899,
    "wkcbd_SR3+": 1.0671968496505624,
    "wkcbd_Transit": 1.3098935436679964,
    "wkcbd_Walk": 0.09596819068963038,
    "wkempden_Bike": 0.0019099447823146232,
    "wkempden_SR2": 0.0015784089844443042,
    "wkempden_SR3+": 0.002259029274882882,
    "wkempden_Transit": 0.0031323024172347728,
    "wkempden_Walk": 0.002898970912944004,
}
for k in expected_x:
    assert revealed_x[k] == approx(expected_x[k], 2e-2), (
        f"{k}, {revealed_x[k] / expected_x[k]}"
    )

In [None]:
m.parameter_summary()