Examples
The following scripts help with the download and remote access to the PIKART catalog. All exemplary scripts are provided in Python and MATLAB.
List of examples
- PIKART_AR-Count.m
- PIKART_AR-Count.py
- PIKART_DataAccess_Eulerian.m
- PIKART_DataAccess_Eulerian_siphon.py
- PIKART_DataAccess_Eulerian_xarray.py
- PIKART_DataAccess_Lagrangian.m
- PIKART_DataAccess_Lagrangian.py
- PIKART_Download.m
- PIKART_Download.py
- PIKART_Download_xr.py
PIKART_AR-Count.m
Example MATLAB script to count the number of Atmospheric River (AR) conditions
% Example MATLAB script to count the number of Atmospheric River (AR) conditions
% from the Eulerian PIKART catalog using THREDDS access.
% Select the year of interest
year = 2020;
% Construct the OPeNDAP URL to access the NetCDF file for the selected year
url = sprintf('https://ar.pik-potsdam.de/thredds/dodsC/eulerian_era5/PIKARTV1_eulerian_ERA5_0p5deg_6hr_%d.nc', year);
% Get information about the 'ar_mask' variable to determine its dimensions
info = ncinfo(url, 'ar_mask');
% Define the base time used in the NetCDF time variable (ERA5 reference)
t0 = datenum('1940-01-01 00:00:00');
% Get the size of the 'ar_mask' variable (dimensions: lon x lat x time)
size_ar = info.Size;
% Extract number of time steps from the third dimension
timeframes = size_ar(3);
% Preallocate array to store time and AR count for each time frame
cnt = zeros(timeframes, 2);
% Create a progress bar for visual feedback
h = waitbar(0, 'Count ARs');
% Loop over each time frame to count the number of AR objects
for i = 1:timeframes
waitbar(i / timeframes) % Update the progress bar
% Define the 3D start index and count to read a single 2D AR mask snapshot
start = [1, 1, i];
count = [size_ar(1), size_ar(2), 1];
% Read the 2D AR mask for the current time frame
ar_snapshot = ncread(url, 'ar_mask', start, count);
% Read the corresponding time value
t = ncread(url, 'time', i, 1);
% Label connected AR regions in the snapshot using 4-neighborhood connectivity
[L, num] = bwlabel(ar_snapshot, 4);
% Store time and number of detected ARs
cnt(i, 1) = t;
cnt(i, 2) = num;
end
% Close the progress bar
close(h)
% Plot the number of ARs over time
plot(t0 + cnt(:,1)/24, cnt(:,2)) % Convert hours since t0 to MATLAB datenum format
datetick % Format x-axis as calendar dates
title(sprintf('Number of ARs in year %d', year)) % Add title with the year
PIKART_AR-Count.py
Example Python script to count the number of Atmospheric River (AR) conditions
# Example Python script to count the number of Atmospheric River (AR) conditions
# from the Eulerian PIKART catalog using THREDDS access.
# Import required libraries
from siphon.catalog import TDSCatalog # For accessing THREDDS catalogs
import matplotlib.pyplot as plt # For plotting
import numpy as np # For numerical operations
import pandas as pd # For tabular data
from scipy.ndimage import label # For connected component labeling
from tqdm import tqdm # For progress bar
import datetime # For time conversion
# Optional duplicate import (can be removed)
import matplotlib.pyplot as plt
def get_data(year):
"""
Access the remote NetCDF dataset for the given year from the PIKART catalog using THREDDS.
"""
# URL to the THREDDS catalog
catUrl = "https://ar.pik-potsdam.de/thredds/catalog/eulerian_era5/catalog.xml"
# Build the dataset name based on the year
datasetName = f"PIKARTV1_eulerian_ERA5_0p5deg_6hr_{year}.nc"
# Open the catalog and get dataset object
catalog = TDSCatalog(catUrl)
ds = catalog.datasets[datasetName]
# Use remote access to get the dataset object (netCDF-like access)
dataset = ds.remote_access()
return dataset
def count_ARs(year):
"""
Count Atmospheric River (AR) conditions per time step for a given year.
"""
# Get dataset handle from THREDDS server
ds = get_data(year)
# Access the AR mask variable (3D: time x lat x lon)
ar_mask = ds.variables['ar_mask']
# Get time variable (in hours since 1940-01-01 00:00:00)
time = ds.variables['time'][:]
# Define connectivity structure (3x3) for 8-neighbor labeling in 2D
structure = np.ones((3, 3), dtype=int)
# Prepare array to store AR counts for each timestep
count = np.zeros(ar_mask.shape[0], dtype=int)
# Iterate over time steps and count connected AR regions using labeling
for i in tqdm(range(ar_mask.shape[0]), desc="Processing timesteps"):
labeled_array, num_features = label(ar_mask[i, :, :], structure=structure)
count[i] = num_features
# Create dataframe with time and number of ARs
df_count = pd.DataFrame({
'Time': time,
'AR_Count': count
})
return df_count
# Set year of interest
year = 2020
# Create empty dataframe for possible multi-year concatenation
cnt_all = pd.DataFrame()
# Count ARs for selected year
df = count_ARs(year=year)
# Optional: add year column to the dataframe
df["Year"] = year
# Concatenate into result dataframe
cnt_all = pd.concat([cnt_all, df], ignore_index=True)
# Base time for time conversion (1940-01-01 00:00:00)
base_time = datetime.datetime(1940, 1, 1)
# Convert hours-since-1940 to datetime
cnt_all['Datetime'] = cnt_all['Time'].apply(lambda h: base_time + datetime.timedelta(hours=float(h)))
# Plotting the number of ARs over time
plt.figure(figsize=(12, 4))
plt.plot(cnt_all['Datetime'], cnt_all['AR_Count'])
plt.xlabel('Time')
plt.ylabel('Number of ARs')
plt.title('AR Counts Over Time')
plt.grid(True)
plt.tight_layout()
plt.show()
PIKART_DataAccess_Eulerian.m
Example MATLAB script to remotely access PIKART Eulerian data via OPeNDAP
% Example MATLAB script to remotely access PIKART Eulerian data via OPeNDAP
% Define the year of interest for which data will be accessed
year = '2022';
% Base URL of the THREDDS data server (OPeNDAP endpoint)
% Dataset follows the naming pattern: PIKARTV1_eulerian_ERA5_0p5deg_6hr_<year>.nc
baseUrl = 'https://ar.pik-potsdam.de/thredds/dodsC/eulerian_era5/';
datasetName = ['PIKARTV1_eulerian_ERA5_0p5deg_6hr_', year, '.nc'];
% Construct the full OPeNDAP URL for remote access
url = [baseUrl, datasetName];
% Get information about the variables in the dataset (optional, for inspection)
info = ncinfo(url);
disp('Variables in the dataset:');
for k = 1:length(info.Variables)
disp(info.Variables(k).Name)
end
% Get metadata specifically for the 'ivt' variable to determine dimensions
info = ncinfo(url, 'ivt');
dimNames = {info.Dimensions.Name}; % Names of dimensions (e.g., 'time', 'lat', 'lon')
dimLengths = [info.Dimensions.Length]; % Lengths of each dimension
% Find the index of the 'time' dimension
timeDimIdx = find(strcmp(dimNames, 'time'));
timeLength = dimLengths(timeDimIdx); % Total number of time steps
% Define read window to access only the last time slice
% Assumes dimensions are ordered as: time x lat x lon
% We read only the last time step (along 3rd dimension if needed)
start = [1, 1, timeLength]; % Start at first lat/lon, last time index
count = [dimLengths(1), dimLengths(2), 1]; % Read full lat/lon, 1 time slice
% Read the IVT data slice
ivt = ncread(url, 'ivt', start, count);
% Mask non-positive values (<= 0) by replacing them with NaN
ivt(ivt <= 0) = NaN;
% Plot the log10 of the IVT snapshot to enhance visual contrast
figure;
imagesc(log10(ivt')); % Transpose for correct lat-lon orientation
colorbar;
title(['Log_{10} of IVT at last time step in ', year]);
xlabel('Longitude');
ylabel('Latitude');
set(gca, 'YDir', 'normal'); % Ensure Y-axis (latitude) increases upward
PIKART_DataAccess_Eulerian_siphon.py
Example Python script to remotely access PIKART Eulerian data using siphon
# Example Python script to remotely access PIKART Eulerian data using siphon
# Import required libraries
from siphon.catalog import TDSCatalog # For accessing the THREDDS catalog
import matplotlib.pyplot as plt # For plotting
import numpy as np # For numerical array operations
# URL of dataset
# Define the base URL of the THREDDS Data Server catalog
catUrl = "https://ar.pik-potsdam.de/thredds/catalog/eulerian_era5/catalog.xml"
# Select the year of interest for which data will be accessed
year = '2022'
# Access a dataset
# Construct the specific dataset filename using the selected year
datasetName = f"PIKARTV1_eulerian_ERA5_0p5deg_6hr_{year}.nc"
# Load the catalog using Siphon
catalog = TDSCatalog(catUrl)
# Access the specific dataset object from the catalog
ds = catalog.datasets[datasetName]
# Print or inspect the name (optional)
ds.name
#%% Remote access to dataset
# Use remote_access() to open the dataset via OPeNDAP
# This allows you to read data directly without downloading the full file
dataset = ds.remote_access()
# Show all variables available in the dataset
dataset.variables
# Extract the IVT (Integrated Vapor Transport) variable from the dataset
ivt = dataset.variables['ivt']
# Display the shape of the IVT array (e.g., time, lat, lon)
ivt.shape
# Show IVT snapshot of the last day in dataset
# Extract the last time slice of IVT and mask all non-positive values as NaN
ivt_snapshot = np.where(ivt[-1, :, :] > 0, ivt[-1, :, :], np.nan)
# Plot the log10 of the IVT snapshot to enhance contrast
plt.imshow(np.log10(ivt_snapshot))
# Display the plot
plt.show()
PIKART_DataAccess_Eulerian_xarray.py
Example Python script to remotely access PIKART Eulerian data using xarray
# Example Python script to remotely access PIKART Eulerian data using xarray
import xarray as xr
import matplotlib.pyplot as plt
import numpy as np
# Define the year of interest
year = '2022'
# Construct the OPeNDAP URL for the dataset
opendap_url = f"https://ar.pik-potsdam.de/thredds/dodsC/eulerian_era5/PIKARTV1_eulerian_ERA5_0p5deg_6hr_{year}.nc"
# Open the dataset directly with xarray
ds = xr.open_dataset(opendap_url)
# Show all variables available in the dataset
print(ds.data_vars)
# Extract the IVT (Integrated Vapor Transport) variable from the dataset
ivt = ds['ivt']
# Display the shape of the IVT array
print(ivt.shape)
# Show IVT snapshot of the last day in dataset
ivt_last = ivt.isel(time=-1) # Select the last time slice
# Mask all non-positive values as NaN
ivt_snapshot = ivt_last.where(ivt_last > 0, np.nan)
# Plot the log10 of the IVT snapshot to enhance contrast
plt.imshow(np.log10(ivt_snapshot), origin='lower')
plt.title('Log10 IVT Snapshot (Last Day)')
plt.colorbar(label='log10(IVT)')
plt.show()
PIKART_DataAccess_Lagrangian.m
Example MATLAB script to remotely access PIKART Lagrangian data via OPeNDAP
% Example MATLAB script to remotely access PIKART Lagrangian data via OPeNDAP
% Define the year of interest for data extraction
year = '2022';
% Base OPeNDAP endpoint for lagrangian dataset
baseUrl = 'https://ar.pik-potsdam.de/thredds/dodsC/lagrangian_era5/';
datasetName = ['PIKARTV1_lagrangian_ERA5_0p5deg_6hr_', year, '.nc'];
% Construct the full OPeNDAP URL
url = [baseUrl, datasetName];
% List all available variables in the dataset (optional inspection step)
info = ncinfo(url);
disp('Variables in the dataset:');
for k = 1:length(info.Variables)
disp(info.Variables(k).Name)
end
% Choose a specific AR track ID and time index to visualize
trackid = 3;
timeidx = 1;
% Inspect the 'contour' variable to confirm dimension names and sizes
info = ncinfo(url, 'contour');
dimNames = {info.Dimensions.Name}; % Get dimension names (e.g., 'points', 'coord', 'track', 'time')
dimLengths = [info.Dimensions.Length]; % Get dimension lengths
% Show AR snapshot of the first day in dataset using axis and contour
% Read the 'axis' and 'contour' variables for a specific track and time
% Output is squeezed to 2D: [points x coord], then cast to double
axis = double(squeeze(ncread(url, 'axis', [1, 1, trackid, timeidx], [Inf, 2, 1, 1])));
contour = double(squeeze(ncread(url, 'contour', [1, 1, trackid, timeidx], [Inf, 2, 1, 1])));
% Read model time and convert it from hours since 1940-01-01 to a datetime string
time_val = ncread(url, 'time', timeidx, 1);
base_time = datetime(1940, 1, 1, 0, 0, 0);
actual_time = base_time + hours(time_val);
time_str = datestr(actual_time, 'yyyy-mm-dd HH:MM UTC');
% Visualize the AR track and contour on a world map
clf
worldmap('world') % Set up a global map projection
land = shaperead('landareas', 'UseGeoCoords', true); % Load land polygons
geoshow(land, 'FaceColor', [0.8 0.8 0.8]) % Display continents in light gray
linem(axis(:,1), axis(:,2), 'LineWidth', 2) % Plot AR axis (track line)
linem(contour(:,1), contour(:,2), 'LineWidth', 1, 'Color', 'red') % Plot AR contour
title(['Atmospheric River Track and Contour - Track ', num2str(trackid), ', Time: ', time_str])
xlabel('Longitude')
ylabel('Latitude')
grid on
axis equal
% Show histogram of AR areas for a selected point in time
% Read AR area data for the selected time
info = ncinfo(url, 'area');
dimNames = {info.Dimensions.Name}; % Get dimension names for area
area = ncread(url, 'area', [1, timeidx], [Inf, 1]); % Read all areas for this timestep
area(area <= 0) = NaN; % Mask invalid or non-positive area values
% Plot a histogram of the AR areas at the selected time
clf
histogram(area, 10, 'FaceColor', [0.4 0.8 1], 'EdgeColor', 'k')
xlabel('AR Area (km^2)')
ylabel('Number of ARs')
title(['Histogram of AR Areas at Time: ', time_str])
grid on
PIKART_DataAccess_Lagrangian.py
Example Python script to remotely access PIKART Lagrangian data via OPeNDAP
# Example Python script to remotely access PIKART Lagrangian data via OPeNDAP
# Import required libraries
from siphon.catalog import TDSCatalog # For accessing the THREDDS catalog
import matplotlib.pyplot as plt # For plotting
import numpy as np # For numerical array operations
import cartopy.crs as ccrs # For cartographic projections
import cartopy.feature as cfeature # For adding geographical features
import datetime # For handling time conversion
# URL of dataset
# Define the base URL of the THREDDS Data Server catalog
catUrl = "https://ar.pik-potsdam.de/thredds/catalog/lagrangian_era5/catalog.xml"
# Select the year of interest for which data will be accessed
year = '2022'
# Construct the specific dataset filename using the selected year
datasetName = f"PIKARTV1_lagrangian_ERA5_0p5deg_6hr_{year}.nc"
# Load the catalog using Siphon
catalog = TDSCatalog(catUrl)
# Access the specific dataset object from the catalog
ds = catalog.datasets[datasetName]
# Optional: Inspect dataset name
ds.name
# Remote access to dataset
# Use remote_access() to open the dataset via OPeNDAP
# This allows you to read data directly without downloading the full file
dataset = ds.remote_access()
# Show all variables available in the dataset
print(dataset.variables.keys())
# Extract the AR contours variable from the dataset
contour = dataset.variables['contour']
# Display the shape of the contours array (e.g., trackid, time, lat/lon, coordinate)
print(contour.shape)
# Show AR snapshot of the first day in dataset using axis and contour
# Select an AR and time index to plot (customize as needed)
trackid = 2
timeidx = 0 # First timestep
# Extract axis and contour data for the selected track and time
axis = dataset.variables['axis'][timeidx, trackid]
contour = dataset.variables['contour'][timeidx, trackid]
time = dataset.variables['time'][timeidx]
# Convert model time to real-world time format
base_time = datetime.datetime(1940, 1, 1, 0, 0, 0) # Reference time for conversion
time_in_hours = float(dataset.variables['time'][timeidx])
actual_time = base_time + datetime.timedelta(hours=time_in_hours)
time_str = actual_time.strftime("%Y-%m-%d %H:%M UTC") # Human-readable time string
# Plot AR axis and contour on map
fig = plt.figure(figsize=(10, 6))
ax = plt.axes(projection=ccrs.PlateCarree())
ax.set_global()
ax.coastlines()
ax.add_feature(cfeature.BORDERS, linestyle=':')
ax.add_feature(cfeature.LAND, facecolor='lightgray')
ax.add_feature(cfeature.OCEAN, facecolor='lightblue')
# Plot axis (centerline of AR) and contour (outer shape of AR)
ax.plot(axis[1,:], axis[0,:], color='red', linewidth=2, label='AR Axis', transform=ccrs.PlateCarree())
ax.plot(contour[1,:], contour[0,:], color='blue', linewidth=1, label='AR Contour', transform=ccrs.PlateCarree())
# Add legend and title
ax.legend()
ax.set_title(f"Atmospheric River\nTrack: {trackid}, Time: {time_str}")
plt.show()
# Show histogram of AR areas for a selected point in time
# Extract area values of ARs at selected time
area = dataset.variables['area'][timeidx]
area_data = np.array(area)
# Clean data by removing NaN values
area_data = area_data[~np.isnan(area_data)]
# Plot histogram of AR areas
plt.figure(figsize=(8,5))
plt.hist(area_data, bins=10, color='skyblue', edgecolor='black')
plt.xlabel('AR Area (km²)')
plt.ylabel('Number of ARs')
plt.title(f'Histogram of AR Areas\nTime: {time_str}')
plt.grid(True)
plt.show()
PIKART_Download.m
Example MATLAB script to download PIKART NetCDF files
% Example MATLAB script to download PIKART NetCDF files
% Version of the catalog
catalog_version = 'lagrangian'; % select Lagrangian version of the catalog
%catalog_version = 'eulerian'; % select Eulerian version of the catalog
% Base URL for file downloads via the THREDDS HTTP FileServer
base_file_url = ['https://ar.pik-potsdam.de/thredds/fileServer/',catalog_version,'_era5/'];
% Select one or more years to download data for
years = 2022; % Example: a single year, or use
%years = [2020:2022]; % for multiple years
% Loop over each requested year
for i = 1:length(years)
year = years(i); % Get the current year from the list
% Construct the expected filename for this year
filename = sprintf('PIKARTV1_%s_ERA5_0p5deg_6hr_%d.nc', catalog_version, year);
% Build the full download URL by appending the filename to the base URL
file_url = [base_file_url, filename];
% Define the local file path to save the downloaded file (current working directory)
local_file = fullfile(pwd, filename);
% Attempt to download the file
try
fprintf('Downloading %s ...\n', file_url); % Display message before download
websave(local_file, file_url); % Download and save the file
fprintf('Saved to %s\n', local_file); % Confirm successful save
catch ME
% In case of error (e.g., file not found or network issue), print a message
fprintf('Failed to download %s: %s\n', filename, ME.message);
end
end
PIKART_Download.py
Example Python script to download PIKART NetCDF files via OPeNDAP
# Example Python script to download PIKART NetCDF files via OPeNDAP
# Import necessary libraries
from siphon.catalog import TDSCatalog # For accessing THREDDS data catalogs
import requests # For downloading data from HTTPServer
from tqdm import tqdm # For showing download progress bar
def PIKART_Data_Download(year, catalog_version='eulerian'):
"""
Function to download a single year's PIKART NetCDF file
from the THREDDS Data Server (TDS) using HTTPServer access.
"""
# URL of the PIKART catalog
catUrl = f"https://ar.pik-potsdam.de/thredds/catalog/{catalog_version}_era5/catalog.xml"
# Build dataset name using the selected year
datasetName = f"PIKARTV1_{catalog_version}_ERA5_0p5deg_6hr_{year}.nc"
# Open the catalog
catalog = TDSCatalog(catUrl)
# Access the dataset object from the catalog
ds = catalog.datasets[datasetName]
# Retrieve the direct download link (via HTTPServer protocol)
file_url = ds.access_urls['HTTPServer']
# Set the local filename for saving the dataset
save_path = f"PIKARTV1_{catalog_version}_ERA5_0p5deg_6hr_{year}.nc"
# Initiate the download with streaming mode
response = requests.get(file_url, stream=True)
# Get the total file size (for progress tracking)
total_size = int(response.headers.get('Content-Length', 0))
chunk_size = 8192 # Download in 8 KB chunks
# Open local file and begin writing in chunks, showing progress
with open(save_path, 'wb') as f, tqdm(
total=total_size,
unit='B', # Unit = Bytes
unit_scale=True, # Show in KB/MB/GB
unit_divisor=1024, # Use 1024-based units
desc=save_path, # Progress bar label
) as progress_bar:
for chunk in response.iter_content(chunk_size=chunk_size):
if chunk: # Only write non-empty chunks
f.write(chunk)
progress_bar.update(len(chunk)) # Update progress bar
print(f"Full dataset saved to: {save_path}")
# Example usage
# Download data for a single year (e.g., 2019)
PIKART_Data_Download(year=2019) # Eulerian version of the catalog
PIKART_Data_Download(year=2019, catalog_version='lagrangian') # Lagrangian version of the catalog
# Download Eulerian data for multiple years (e.g., 2015–2020)
for year in range(2015, 2021):
PIKART_Data_Download(year=year)
PIKART_Download_xr.py
Example Python script to download PIKART NetCDF files via HTTP
# Example Python script to download PIKART NetCDF files via HTTP
import requests
from tqdm import tqdm
def pikart_data_download(year, catalog_version='eulerian'):
"""
Download a single year's PIKART NetCDF file
from the THREDDS Data Server using HTTPServer access,
with a visible progress bar.
"""
# Construct the HTTPServer URL for the dataset
http_url = (
f"https://ar.pik-potsdam.de/thredds/fileServer/"
f"{catalog_version}_era5/PIKARTV1_{catalog_version}_ERA5_0p5deg_6hr_{year}.nc"
)
save_path = f"PIKARTV1_{catalog_version}_ERA5_0p5deg_6hr_{year}.nc"
print(f"Downloading {http_url}")
response = requests.get(http_url, stream=True)
total_size = int(response.headers.get('content-length', 0))
chunk_size = 1024 * 1024 # 1 MB
with open(save_path, 'wb') as f, tqdm(
total=total_size,
unit='B',
unit_scale=True,
desc=save_path,
unit_divisor=1024
) as pbar:
for chunk in response.iter_content(chunk_size=chunk_size):
if chunk:
f.write(chunk)
pbar.update(len(chunk))
print(f"Full dataset saved to: {save_path}")
# Example usage:
# Download data for a single year (e.g., 2019)
pikart_data_download(year=2019) # Eulerian version
pikart_data_download(year=2019, catalog_version='lagrangian') # Lagrangian version
# Download Eulerian data for multiple years (e.g., 2015–2020)
for year in range(2015, 2021):
pikart_data_download(year=year)