The PIKART catalogue will be online soon.

Examples

The following scripts help with the download and remote access to the PIKART catalog. All exemplary scripts are provided in Python and MATLAB.

List of examples


PIKART_AR-Count.m

Example MATLAB script to count the number of Atmospheric River (AR) conditions

% Example MATLAB script to count the number of Atmospheric River (AR) conditions
% from the Eulerian PIKART catalog using THREDDS access.

% Select the year of interest
year = 2020;

% Construct the OPeNDAP URL to access the NetCDF file for the selected year
url = sprintf('https://ar.pik-potsdam.de/thredds/dodsC/eulerian_era5/PIKARTV1_eulerian_ERA5_0p5deg_6hr_%d.nc', year);

% Get information about the 'ar_mask' variable to determine its dimensions
info = ncinfo(url, 'ar_mask');

% Define the base time used in the NetCDF time variable (ERA5 reference)
t0 = datenum('1940-01-01 00:00:00');

% Get the size of the 'ar_mask' variable (dimensions: lon x lat x time)
size_ar = info.Size;

% Extract number of time steps from the third dimension
timeframes = size_ar(3);

% Preallocate array to store time and AR count for each time frame
cnt = zeros(timeframes, 2);

% Create a progress bar for visual feedback
h = waitbar(0, 'Count ARs');

% Loop over each time frame to count the number of AR objects
for i = 1:timeframes
   waitbar(i / timeframes)  % Update the progress bar

   % Define the 3D start index and count to read a single 2D AR mask snapshot
   start  = [1, 1, i];
   count  = [size_ar(1), size_ar(2), 1];

   % Read the 2D AR mask for the current time frame
   ar_snapshot = ncread(url, 'ar_mask', start, count);

   % Read the corresponding time value
   t = ncread(url, 'time', i, 1);

   % Label connected AR regions in the snapshot using 4-neighborhood connectivity
   [L, num] = bwlabel(ar_snapshot, 4);

   % Store time and number of detected ARs
   cnt(i, 1) = t;
   cnt(i, 2) = num;
end

% Close the progress bar
close(h)

% Plot the number of ARs over time
plot(t0 + cnt(:,1)/24, cnt(:,2))  % Convert hours since t0 to MATLAB datenum format
datetick                          % Format x-axis as calendar dates
title(sprintf('Number of ARs in year %d', year))  % Add title with the year

PIKART_AR-Count.py

Example Python script to count the number of Atmospheric River (AR) conditions

# Example Python script to count the number of Atmospheric River (AR) conditions
# from the Eulerian PIKART catalog using THREDDS access.

# Import required libraries
from siphon.catalog import TDSCatalog             # For accessing THREDDS catalogs
import matplotlib.pyplot as plt                   # For plotting
import numpy as np                                # For numerical operations
import pandas as pd                               # For tabular data
from scipy.ndimage import label                   # For connected component labeling
from tqdm import tqdm                             # For progress bar
import datetime                                   # For time conversion

# Optional duplicate import (can be removed)
import matplotlib.pyplot as plt


def get_data(year):
    """
    Access the remote NetCDF dataset for the given year from the PIKART catalog using THREDDS.
    """
    # URL to the THREDDS catalog
    catUrl = "https://ar.pik-potsdam.de/thredds/catalog/eulerian_era5/catalog.xml"

    # Build the dataset name based on the year
    datasetName = f"PIKARTV1_eulerian_ERA5_0p5deg_6hr_{year}.nc"

    # Open the catalog and get dataset object
    catalog = TDSCatalog(catUrl)
    ds = catalog.datasets[datasetName]

    # Use remote access to get the dataset object (netCDF-like access)
    dataset = ds.remote_access()

    return dataset


def count_ARs(year):
    """
    Count Atmospheric River (AR) conditions per time step for a given year.
    """

    # Get dataset handle from THREDDS server
    ds = get_data(year)

    # Access the AR mask variable (3D: time x lat x lon)
    ar_mask = ds.variables['ar_mask']
    
    # Get time variable (in hours since 1940-01-01 00:00:00)
    time = ds.variables['time'][:]

    # Define connectivity structure (3x3) for 8-neighbor labeling in 2D
    structure = np.ones((3, 3), dtype=int)

    # Prepare array to store AR counts for each timestep
    count = np.zeros(ar_mask.shape[0], dtype=int)

    # Iterate over time steps and count connected AR regions using labeling
    for i in tqdm(range(ar_mask.shape[0]), desc="Processing timesteps"):
        labeled_array, num_features = label(ar_mask[i, :, :], structure=structure)
        count[i] = num_features

    # Create dataframe with time and number of ARs
    df_count = pd.DataFrame({
        'Time': time,
        'AR_Count': count
    })

    return df_count


# Set year of interest
year = 2020

# Create empty dataframe for possible multi-year concatenation
cnt_all = pd.DataFrame()

# Count ARs for selected year
df = count_ARs(year=year)

# Optional: add year column to the dataframe
df["Year"] = year

# Concatenate into result dataframe
cnt_all = pd.concat([cnt_all, df], ignore_index=True)

# Base time for time conversion (1940-01-01 00:00:00)
base_time = datetime.datetime(1940, 1, 1)

# Convert hours-since-1940 to datetime
cnt_all['Datetime'] = cnt_all['Time'].apply(lambda h: base_time + datetime.timedelta(hours=float(h)))

# Plotting the number of ARs over time
plt.figure(figsize=(12, 4))
plt.plot(cnt_all['Datetime'], cnt_all['AR_Count'])
plt.xlabel('Time')
plt.ylabel('Number of ARs')
plt.title('AR Counts Over Time')
plt.grid(True)
plt.tight_layout()
plt.show()

PIKART_DataAccess_Eulerian.m

Example MATLAB script to remotely access PIKART Eulerian data via OPeNDAP

% Example MATLAB script to remotely access PIKART Eulerian data via OPeNDAP

% Define the year of interest for which data will be accessed
year = '2022';

% Base URL of the THREDDS data server (OPeNDAP endpoint)
% Dataset follows the naming pattern: PIKARTV1_eulerian_ERA5_0p5deg_6hr_<year>.nc
baseUrl = 'https://ar.pik-potsdam.de/thredds/dodsC/eulerian_era5/';
datasetName = ['PIKARTV1_eulerian_ERA5_0p5deg_6hr_', year, '.nc'];

% Construct the full OPeNDAP URL for remote access
url = [baseUrl, datasetName];

% Get information about the variables in the dataset (optional, for inspection)
info = ncinfo(url);
disp('Variables in the dataset:');
for k = 1:length(info.Variables)
    disp(info.Variables(k).Name)
end

% Get metadata specifically for the 'ivt' variable to determine dimensions
info = ncinfo(url, 'ivt');
dimNames = {info.Dimensions.Name};      % Names of dimensions (e.g., 'time', 'lat', 'lon')
dimLengths = [info.Dimensions.Length];  % Lengths of each dimension

% Find the index of the 'time' dimension 
timeDimIdx = find(strcmp(dimNames, 'time'));
timeLength = dimLengths(timeDimIdx);  % Total number of time steps

% Define read window to access only the last time slice
% Assumes dimensions are ordered as: time x lat x lon
% We read only the last time step (along 3rd dimension if needed)
start = [1, 1, timeLength];                 % Start at first lat/lon, last time index
count = [dimLengths(1), dimLengths(2), 1];  % Read full lat/lon, 1 time slice

% Read the IVT data slice
ivt = ncread(url, 'ivt', start, count);

% Mask non-positive values (<= 0) by replacing them with NaN
ivt(ivt <= 0) = NaN;

% Plot the log10 of the IVT snapshot to enhance visual contrast
figure;
imagesc(log10(ivt'));         % Transpose for correct lat-lon orientation
colorbar;
title(['Log_{10} of IVT at last time step in ', year]);
xlabel('Longitude');
ylabel('Latitude');
set(gca, 'YDir', 'normal');   % Ensure Y-axis (latitude) increases upward

PIKART_DataAccess_Eulerian_siphon.py

Example Python script to remotely access PIKART Eulerian data using siphon

# Example Python script to remotely access PIKART Eulerian data using siphon

# Import required libraries
from siphon.catalog import TDSCatalog            # For accessing the THREDDS catalog
import matplotlib.pyplot as plt                  # For plotting
import numpy as np                               # For numerical array operations

# URL of dataset

# Define the base URL of the THREDDS Data Server catalog
catUrl = "https://ar.pik-potsdam.de/thredds/catalog/eulerian_era5/catalog.xml"

# Select the year of interest for which data will be accessed
year = '2022'

# Access a dataset

# Construct the specific dataset filename using the selected year
datasetName = f"PIKARTV1_eulerian_ERA5_0p5deg_6hr_{year}.nc"

# Load the catalog using Siphon
catalog = TDSCatalog(catUrl)

# Access the specific dataset object from the catalog
ds = catalog.datasets[datasetName]

# Print or inspect the name (optional)
ds.name

#%% Remote access to dataset

# Use remote_access() to open the dataset via OPeNDAP
# This allows you to read data directly without downloading the full file
dataset = ds.remote_access()

# Show all variables available in the dataset
dataset.variables

# Extract the IVT (Integrated Vapor Transport) variable from the dataset
ivt = dataset.variables['ivt']

# Display the shape of the IVT array (e.g., time, lat, lon)
ivt.shape

# Show IVT snapshot of the last day in dataset

# Extract the last time slice of IVT and mask all non-positive values as NaN
ivt_snapshot = np.where(ivt[-1, :, :] > 0, ivt[-1, :, :], np.nan)

# Plot the log10 of the IVT snapshot to enhance contrast
plt.imshow(np.log10(ivt_snapshot))

# Display the plot
plt.show()

PIKART_DataAccess_Eulerian_xarray.py

Example Python script to remotely access PIKART Eulerian data using xarray

# Example Python script to remotely access PIKART Eulerian data using xarray

import xarray as xr
import matplotlib.pyplot as plt
import numpy as np

# Define the year of interest
year = '2022'

# Construct the OPeNDAP URL for the dataset
opendap_url = f"https://ar.pik-potsdam.de/thredds/dodsC/eulerian_era5/PIKARTV1_eulerian_ERA5_0p5deg_6hr_{year}.nc"

# Open the dataset directly with xarray
ds = xr.open_dataset(opendap_url)

# Show all variables available in the dataset
print(ds.data_vars)

# Extract the IVT (Integrated Vapor Transport) variable from the dataset
ivt = ds['ivt']

# Display the shape of the IVT array
print(ivt.shape)

# Show IVT snapshot of the last day in dataset
ivt_last = ivt.isel(time=-1)  # Select the last time slice

# Mask all non-positive values as NaN
ivt_snapshot = ivt_last.where(ivt_last > 0, np.nan)

# Plot the log10 of the IVT snapshot to enhance contrast
plt.imshow(np.log10(ivt_snapshot), origin='lower')
plt.title('Log10 IVT Snapshot (Last Day)')
plt.colorbar(label='log10(IVT)')
plt.show()

PIKART_DataAccess_Lagrangian.m

Example MATLAB script to remotely access PIKART Lagrangian data via OPeNDAP

% Example MATLAB script to remotely access PIKART Lagrangian data via OPeNDAP

% Define the year of interest for data extraction
year = '2022';

% Base OPeNDAP endpoint for lagrangian dataset
baseUrl = 'https://ar.pik-potsdam.de/thredds/dodsC/lagrangian_era5/';
datasetName = ['PIKARTV1_lagrangian_ERA5_0p5deg_6hr_', year, '.nc'];

% Construct the full OPeNDAP URL
url = [baseUrl, datasetName];

% List all available variables in the dataset (optional inspection step)
info = ncinfo(url);
disp('Variables in the dataset:');
for k = 1:length(info.Variables)
    disp(info.Variables(k).Name)
end

% Choose a specific AR track ID and time index to visualize
trackid = 3;
timeidx = 1;

% Inspect the 'contour' variable to confirm dimension names and sizes
info = ncinfo(url, 'contour');
dimNames = {info.Dimensions.Name};      % Get dimension names (e.g., 'points', 'coord', 'track', 'time')
dimLengths = [info.Dimensions.Length];  % Get dimension lengths


% Show AR snapshot of the first day in dataset using axis and contour
% Read the 'axis' and 'contour' variables for a specific track and time
% Output is squeezed to 2D: [points x coord], then cast to double
axis = double(squeeze(ncread(url, 'axis', [1, 1, trackid, timeidx], [Inf, 2, 1, 1])));
contour = double(squeeze(ncread(url, 'contour', [1, 1, trackid, timeidx], [Inf, 2, 1, 1])));

% Read model time and convert it from hours since 1940-01-01 to a datetime string
time_val = ncread(url, 'time', timeidx, 1);
base_time = datetime(1940, 1, 1, 0, 0, 0);
actual_time = base_time + hours(time_val);
time_str = datestr(actual_time, 'yyyy-mm-dd HH:MM UTC');

% Visualize the AR track and contour on a world map
clf
worldmap('world')  % Set up a global map projection
land = shaperead('landareas', 'UseGeoCoords', true);  % Load land polygons
geoshow(land, 'FaceColor', [0.8 0.8 0.8])  % Display continents in light gray

linem(axis(:,1), axis(:,2), 'LineWidth', 2)           % Plot AR axis (track line)
linem(contour(:,1), contour(:,2), 'LineWidth', 1, 'Color', 'red')  % Plot AR contour
title(['Atmospheric River Track and Contour - Track ', num2str(trackid), ', Time: ', time_str])
xlabel('Longitude')
ylabel('Latitude')
grid on
axis equal

% Show histogram of AR areas for a selected point in time
% Read AR area data for the selected time
info = ncinfo(url, 'area');
dimNames = {info.Dimensions.Name};      % Get dimension names for area
area = ncread(url, 'area', [1, timeidx], [Inf, 1]);  % Read all areas for this timestep
area(area <= 0) = NaN;  % Mask invalid or non-positive area values

% Plot a histogram of the AR areas at the selected time
clf
histogram(area, 10, 'FaceColor', [0.4 0.8 1], 'EdgeColor', 'k')
xlabel('AR Area (km^2)')
ylabel('Number of ARs')
title(['Histogram of AR Areas at Time: ', time_str])
grid on

PIKART_DataAccess_Lagrangian.py

Example Python script to remotely access PIKART Lagrangian data via OPeNDAP

# Example Python script to remotely access PIKART Lagrangian data via OPeNDAP

# Import required libraries
from siphon.catalog import TDSCatalog            # For accessing the THREDDS catalog
import matplotlib.pyplot as plt                  # For plotting
import numpy as np                               # For numerical array operations
import cartopy.crs as ccrs                       # For cartographic projections
import cartopy.feature as cfeature               # For adding geographical features
import datetime                                  # For handling time conversion

# URL of dataset
# Define the base URL of the THREDDS Data Server catalog
catUrl = "https://ar.pik-potsdam.de/thredds/catalog/lagrangian_era5/catalog.xml"

# Select the year of interest for which data will be accessed
year = '2022'

# Construct the specific dataset filename using the selected year
datasetName = f"PIKARTV1_lagrangian_ERA5_0p5deg_6hr_{year}.nc"

# Load the catalog using Siphon
catalog = TDSCatalog(catUrl)

# Access the specific dataset object from the catalog
ds = catalog.datasets[datasetName]

# Optional: Inspect dataset name
ds.name

# Remote access to dataset

# Use remote_access() to open the dataset via OPeNDAP
# This allows you to read data directly without downloading the full file
dataset = ds.remote_access()

# Show all variables available in the dataset
print(dataset.variables.keys())

# Extract the AR contours variable from the dataset
contour = dataset.variables['contour']

# Display the shape of the contours array (e.g., trackid, time, lat/lon, coordinate)
print(contour.shape)

# Show AR snapshot of the first day in dataset using axis and contour

# Select an AR and time index to plot (customize as needed)
trackid = 2
timeidx = 0  # First timestep

# Extract axis and contour data for the selected track and time
axis = dataset.variables['axis'][timeidx, trackid]
contour = dataset.variables['contour'][timeidx, trackid]
time = dataset.variables['time'][timeidx]

# Convert model time to real-world time format
base_time = datetime.datetime(1940, 1, 1, 0, 0, 0)  # Reference time for conversion
time_in_hours = float(dataset.variables['time'][timeidx])
actual_time = base_time + datetime.timedelta(hours=time_in_hours)
time_str = actual_time.strftime("%Y-%m-%d %H:%M UTC")  # Human-readable time string

# Plot AR axis and contour on map
fig = plt.figure(figsize=(10, 6))
ax = plt.axes(projection=ccrs.PlateCarree())
ax.set_global()
ax.coastlines()
ax.add_feature(cfeature.BORDERS, linestyle=':')
ax.add_feature(cfeature.LAND, facecolor='lightgray')
ax.add_feature(cfeature.OCEAN, facecolor='lightblue')

# Plot axis (centerline of AR) and contour (outer shape of AR)
ax.plot(axis[1,:], axis[0,:], color='red', linewidth=2, label='AR Axis', transform=ccrs.PlateCarree())
ax.plot(contour[1,:], contour[0,:], color='blue', linewidth=1, label='AR Contour', transform=ccrs.PlateCarree())

# Add legend and title
ax.legend()
ax.set_title(f"Atmospheric River\nTrack: {trackid}, Time: {time_str}")

plt.show()

# Show histogram of AR areas for a selected point in time

# Extract area values of ARs at selected time
area = dataset.variables['area'][timeidx]
area_data = np.array(area)

# Clean data by removing NaN values
area_data = area_data[~np.isnan(area_data)]

# Plot histogram of AR areas
plt.figure(figsize=(8,5))
plt.hist(area_data, bins=10, color='skyblue', edgecolor='black')
plt.xlabel('AR Area (km²)')
plt.ylabel('Number of ARs')
plt.title(f'Histogram of AR Areas\nTime: {time_str}')
plt.grid(True)
plt.show()

PIKART_Download.m

Example MATLAB script to download PIKART NetCDF files

% Example MATLAB script to download PIKART NetCDF files

% Version of the catalog
catalog_version = 'lagrangian'; % select Lagrangian version of the catalog
%catalog_version = 'eulerian'; % select Eulerian version of the catalog

% Base URL for file downloads via the THREDDS HTTP FileServer
base_file_url = ['https://ar.pik-potsdam.de/thredds/fileServer/',catalog_version,'_era5/'];

% Select one or more years to download data for
years = 2022;  % Example: a single year, or use 
%years = [2020:2022]; % for multiple years

% Loop over each requested year
for i = 1:length(years)
    year = years(i);  % Get the current year from the list
    
    % Construct the expected filename for this year
    filename = sprintf('PIKARTV1_%s_ERA5_0p5deg_6hr_%d.nc', catalog_version, year);

    % Build the full download URL by appending the filename to the base URL
    file_url = [base_file_url, filename];

    % Define the local file path to save the downloaded file (current working directory)
    local_file = fullfile(pwd, filename);

    % Attempt to download the file
    try
        fprintf('Downloading %s ...\n', file_url);  % Display message before download
        websave(local_file, file_url);              % Download and save the file
        fprintf('Saved to %s\n', local_file);        % Confirm successful save
    catch ME
        % In case of error (e.g., file not found or network issue), print a message
        fprintf('Failed to download %s: %s\n', filename, ME.message);
    end
end

PIKART_Download.py

Example Python script to download PIKART NetCDF files via OPeNDAP

# Example Python script to download PIKART NetCDF files via OPeNDAP

# Import necessary libraries
from siphon.catalog import TDSCatalog          # For accessing THREDDS data catalogs
import requests                                # For downloading data from HTTPServer
from tqdm import tqdm                          # For showing download progress bar

def PIKART_Data_Download(year, catalog_version='eulerian'):
    """
    Function to download a single year's PIKART NetCDF file 
    from the THREDDS Data Server (TDS) using HTTPServer access.
    """

    # URL of the PIKART catalog
    catUrl = f"https://ar.pik-potsdam.de/thredds/catalog/{catalog_version}_era5/catalog.xml"

    # Build dataset name using the selected year
    datasetName = f"PIKARTV1_{catalog_version}_ERA5_0p5deg_6hr_{year}.nc"

    # Open the catalog
    catalog = TDSCatalog(catUrl)

    # Access the dataset object from the catalog
    ds = catalog.datasets[datasetName]

    # Retrieve the direct download link (via HTTPServer protocol)
    file_url = ds.access_urls['HTTPServer']

    # Set the local filename for saving the dataset
    save_path = f"PIKARTV1_{catalog_version}_ERA5_0p5deg_6hr_{year}.nc"

    # Initiate the download with streaming mode
    response = requests.get(file_url, stream=True)

    # Get the total file size (for progress tracking)
    total_size = int(response.headers.get('Content-Length', 0))
    chunk_size = 8192  # Download in 8 KB chunks

    # Open local file and begin writing in chunks, showing progress
    with open(save_path, 'wb') as f, tqdm(
        total=total_size,
        unit='B',               # Unit = Bytes
        unit_scale=True,        # Show in KB/MB/GB
        unit_divisor=1024,      # Use 1024-based units
        desc=save_path,         # Progress bar label
    ) as progress_bar:
        for chunk in response.iter_content(chunk_size=chunk_size):
            if chunk:  # Only write non-empty chunks
                f.write(chunk)
                progress_bar.update(len(chunk))  # Update progress bar

    print(f"Full dataset saved to: {save_path}")


# Example usage

# Download data for a single year (e.g., 2019)
PIKART_Data_Download(year=2019) # Eulerian version of the catalog
PIKART_Data_Download(year=2019, catalog_version='lagrangian') # Lagrangian version of the catalog

# Download Eulerian data for multiple years (e.g., 2015–2020)
for year in range(2015, 2021):
    PIKART_Data_Download(year=year)

PIKART_Download_xr.py

Example Python script to download PIKART NetCDF files via HTTP

# Example Python script to download PIKART NetCDF files via HTTP

import requests
from tqdm import tqdm

def pikart_data_download(year, catalog_version='eulerian'):
    """
    Download a single year's PIKART NetCDF file
    from the THREDDS Data Server using HTTPServer access,
    with a visible progress bar.
    """
    # Construct the HTTPServer URL for the dataset
    http_url = (
        f"https://ar.pik-potsdam.de/thredds/fileServer/"
        f"{catalog_version}_era5/PIKARTV1_{catalog_version}_ERA5_0p5deg_6hr_{year}.nc"
    )
    save_path = f"PIKARTV1_{catalog_version}_ERA5_0p5deg_6hr_{year}.nc"

    print(f"Downloading {http_url}")
    response = requests.get(http_url, stream=True)
    total_size = int(response.headers.get('content-length', 0))
    chunk_size = 1024 * 1024  # 1 MB

    with open(save_path, 'wb') as f, tqdm(
        total=total_size,
        unit='B',
        unit_scale=True,
        desc=save_path,
        unit_divisor=1024
    ) as pbar:
        for chunk in response.iter_content(chunk_size=chunk_size):
            if chunk:
                f.write(chunk)
                pbar.update(len(chunk))
    print(f"Full dataset saved to: {save_path}")

# Example usage:

# Download data for a single year (e.g., 2019)
pikart_data_download(year=2019)  # Eulerian version
pikart_data_download(year=2019, catalog_version='lagrangian')  # Lagrangian version

# Download Eulerian data for multiple years (e.g., 2015–2020)
for year in range(2015, 2021):
    pikart_data_download(year=year)