In [1]:
%%capture

import warnings
warnings.filterwarnings('ignore')

import calitp_data_analysis.magics

import altair as alt
import pandas as pd
from calitp_data_analysis import calitp_color_palette as cp
from IPython.display import HTML, Markdown, display
from new_transit_metrics_utils import (
    GCS_FILE_PATH,
    make_line,
    make_long,
    make_scatter,
    sum_by_group,
)
from scipy.stats import zscore

# alt.renderers.enable("html")
alt.data_transformers.enable("default", max_rows=None)

WIDTH = 300
HEIGHT = 150

In [2]:
rtpa = "Sacramento Area Council of Governments"

In [3]:
# Parameters
rtpa = "San Luis Obispo Council of Governments"


In [4]:
%%capture_parameters
rtpa

# San Luis Obispo Council of Governments
## New Transit Performance Metrics

The UCLA Institute of Transportation Studies (UCLA ITS) suggests that:
>Updating the policy and legislation that governs state transit funding could help make expenditures more effective and better aligned with the stateâ€™s goals of VMT and GHG reduction, which transit can achieve only through increased ridership.

The UCLA ITS recommends using cost-efficiency metrics (operating expense per VRM/VRH/UPT) and service-effectiveness metrics (passenters per VRM/VRH) to compare transit-oriented vs. auto-oriented markets. 

The charts below display these metrics by different categories.

In [5]:
# read in data
df = pd.read_parquet(
    f"{GCS_FILE_PATH}raw_transit_performance_metrics_data.parquet",
    filters=[
            ("RTPA", "==", rtpa), 
            ("mode", "!=","Vanpool")
    ],
)

In [6]:
# variable lists
agency_cols = ["ntd_id", "agency_name", "RTPA"]
mode_cols = ["mode", "RTPA"]
tos_cols = ["service", "RTPA"]
val_cols = [
    "opex_per_vrh",
    "opex_per_vrm",
    "upt_per_vrh",
    "upt_per_vrm",
    "opex_per_upt",
]

rename_cols={
    'upt':"Unlinked Passenger Trips",
    'vrm':"Vehicle Revenue Miles",
    'vrh':"Vehicle Revenue Hours",
    'opexp_total':"Operating Expense Total",
    'opex_per_vrh':"Operating Expense per Vehicle Revenue Hours",
    'opex_per_vrm':"Operating Expense per Vehicle Revenue Miles",
    'opex_per_upt':"Operating Expense per Unlinked Passenger Trips",
    'upt_per_vrh':"Unlinked Passenger Trips per Vehicle Revenue Hours",
    'upt_per_vrm':"Unlinked Passenger Trips per Vehicle Revenue Miles",
}

In [7]:
by_agency = sum_by_group(df, agency_cols)
by_mode = sum_by_group(df, mode_cols)
by_tos = sum_by_group(df, tos_cols)

In [8]:
by_agency_long = make_long(df=by_agency, group_cols=agency_cols, value_cols=val_cols).replace(rename_cols)
by_mode_long = make_long(df=by_mode, group_cols=mode_cols, value_cols=val_cols).replace(rename_cols)
by_tos_long = make_long(df=by_tos, group_cols=tos_cols, value_cols=val_cols).replace(rename_cols)

df_rename=df.rename(columns=rename_cols)

## Performance Metrics Explained

| Metric type          | Metric example                  | Implicit Goal(s)                       | Advantages                                   | Limitations                                  |
|----------------------|---------------------------------|---------------------------------------|----------------------------------------------|----------------------------------------------|
| Cost-efficiency     | Operating cost per revenue hour (opex_per_vrh) | Reduce costs                         | Useful in both financial and service planning | Favors high labor productivity in dense, congested areas; does not track use |
|                      | Operating cost per revenue mile (opex_per_vrm) |                                       |                                              |                                              |
|                      | Operating cost per vehicle trip (opex_per_upt) |                                       |                                              |                                              |
| Service-effectiveness| Passengers per revenue-vehicle hour (upt_per_vrh) | Increase ridership; reduce poorly patronized service | Useful for service planning; emphasizes what matters to riders | Favors high ridership; does not track costs   |
|                      | Passengers per revenue-vehicle mile (upt_per_vrm) | Increase ridership; reduce low-ridership route miles/segments | Useful for service planning                | Favors high ridership and fast vehicle speeds; does not track costs |


### By Agencies

In [9]:
# all agencies
make_line(
    by_agency_long,
    x_col="year",
    y_col="value",
    facet="variable:N",
    color="agency_name:N",
    chart_title="New performance Metrics per Transit Agency",
    ind_axis=True,
)

### By Mode

In [10]:
make_line(
    by_mode_long,
    x_col="year",
    y_col="value",
    facet="variable:N",
    color="mode:N",
    chart_title="New Performance Metrics per Mode",
    ind_axis=True,
)

### By Type of Service

In [11]:
make_line(
    by_tos_long,
    x_col="year",
    y_col="value",
    facet="variable:N",
    color="service:N",
    chart_title="New Performance Metrics per Type of Service",
    ind_axis=True,
)

## Cost-efficiency metrics
Cost-efficiency measures inputs to outputs: For example, the cost of operating an hour of transit service.

Per the UCLA ITS Paper
>Transit-oriented markets (which are predominantly urban), transit service tends to be relatively service-effective. But high operating costs on these (mostly) older, larger systems can inhibit efforts to improve ridership by adding service. In such contexts, assessing systems with an emphasis on **cost-efficiency (i.e., the cost of operating an hour of service)** grounds would provide incentives for agencies to **manage their costs** so as to be able to provide more service with available funding.

### Operating cost per VRH
Lower is better

In [12]:
make_scatter(
    data=df_rename[df_rename["year"] == "2023"],
    y_ax="Operating Expense Total",
    x_ax="Vehicle Revenue Hours",
    color="reporter_type",
    log_scale=True,
    lin_y_ax=True,
    chart_title=""
).facet(
    facet=alt.Facet("mode", 
                    title="Operating Cost per VRH, by mode"
                   ),
    columns=2,
)

0 rows with zero or negative values excluded due to log scale.


In [13]:
make_scatter(
    data=df_rename[df_rename["year"] == "2023"],
    y_ax="Operating Expense Total",
    x_ax="Vehicle Revenue Hours",
    color="reporter_type",
    log_scale=True,
    lin_y_ax=True,
    chart_title=""
).facet(
    facet=alt.Facet("service", 
                    title="Operating Cost per VRH, by Type of Service"
                   ),
    columns=2,
)

0 rows with zero or negative values excluded due to log scale.




### Operating cost per VRM
Lower is better

In [14]:
make_scatter(
    data=df_rename[df_rename["year"] == "2023"],
    y_ax="Operating Expense Total",
    x_ax="Vehicle Revenue Miles",
    color="reporter_type",
    log_scale=True,
    lin_y_ax=True,
    chart_title=""
).facet(
    facet=alt.Facet("mode",title="Operating Cost per VRH, by mode"),
    columns=2,
)

0 rows with zero or negative values excluded due to log scale.


In [15]:
make_scatter(
    data=df_rename[df_rename["year"] == "2023"],
    y_ax="Operating Expense Total",
    x_ax="Vehicle Revenue Miles",
    color="reporter_type",
    log_scale=True,
    lin_y_ax=True,
    chart_title=""
).facet(
    facet=alt.Facet("service",title="Operating Cost per VRH, by Type of Service"),
    columns=2,
)

0 rows with zero or negative values excluded due to log scale.


### Operating cost per trip
Lower is better

In [16]:
make_scatter(
    data=df_rename[df_rename["year"] == "2023"],
    y_ax="Operating Expense Total",
    x_ax="Unlinked Passenger Trips",
    color="reporter_type",
    log_scale=True,
    lin_y_ax=True,
    chart_title=""
).facet(
    facet=alt.Facet("mode",
                   title="Operating Cost per UPT, by mode"),
    columns=2,
)

0 rows with zero or negative values excluded due to log scale.


In [17]:
make_scatter(
    data=df_rename[df_rename["year"] == "2023"],
    y_ax="Operating Expense Total",
    x_ax="Unlinked Passenger Trips",
    color="reporter_type",
    log_scale=True,
    lin_y_ax=True,
    chart_title=""
).facet(
    facet=alt.Facet("service",
                   title="Operating Cost per UPT, by Type of Service"),
    columns=2,
)

0 rows with zero or negative values excluded due to log scale.


## Service-effectiveness metrics
Service-effectiveness measures outputs to consumption: For example, passenger boardings per service hour.

Per the UCLA ITS Paper
>[In] more auto-oriented markets, transit operators tend to be relatively cost-efficient, in that they have lower operating costs but serve fewer riders. In this context, assessing systems with an emphasis on **service-effectiveness (i.e., passenger boardings per service hour)** will motivate operators to **improve ridership** by changing service hours, routes, and fares to better match local demand. Agencies might also implement fare programs with schools and other institutions, and even work with municipalities on improving land use around transit in order to increase the relative attractiveness of transit service.

### Passengers per VRH
Higher is better

In [18]:
make_scatter(
    data=df_rename[df_rename["year"] == "2023"],
    y_ax="Unlinked Passenger Trips",
    x_ax="Vehicle Revenue Hours",
    color="reporter_type",
    log_scale=True,
    lin_y_ax=True,
    chart_title=""
).facet(
    facet=alt.Facet("mode",
                   title="Passengers per VRH, by mode"),
    columns=2,
)

0 rows with zero or negative values excluded due to log scale.


In [19]:
make_scatter(
    data=df_rename[df_rename["year"] == "2023"],
    y_ax="Unlinked Passenger Trips",
    x_ax="Vehicle Revenue Hours",
    color="reporter_type",
    log_scale=True,
    lin_y_ax=True,
    chart_title=""
).facet(
    facet=alt.Facet("service",
                   title="Passengers per VRH, by Type of Service"),
    columns=2,
)

0 rows with zero or negative values excluded due to log scale.


### Passengers per VRM
Higher is better

In [20]:
make_scatter(
    data=df_rename[df_rename["year"] == "2023"],
    y_ax="Unlinked Passenger Trips",
    x_ax="Vehicle Revenue Miles",
    color="reporter_type",
    log_scale=True,
    lin_y_ax=True,
    chart_title= ""
).facet(
    facet=alt.Facet("mode",title="Passengers per VRM, by mode"),
    columns=2,
)

0 rows with zero or negative values excluded due to log scale.


In [21]:
make_scatter(
    data=df_rename[df_rename["year"] == "2023"],
    y_ax="Unlinked Passenger Trips",
    x_ax="Vehicle Revenue Miles",
    color="reporter_type",
    log_scale=True,
    lin_y_ax=True,
    chart_title=""
).facet(
    facet=alt.Facet("service",title="Passengers per VRM, by Type of Service"),
    columns=2,
)

0 rows with zero or negative values excluded due to log scale.
