import pandas as pd import numpy as np import matplotlib.pyplot as plt # Load datasets dataset1_path = "/groups/ocean/qifan/draft/combined_dataset185.csv" dataset2_path = "/groups/ocean/qifan/draft/combined_dataset210.csv" dataset3_path = "/groups/ocean/qifan/draft/combined_dataset220.csv" dataset4_path = "/groups/ocean/qifan/draft/combined_dataset230i.csv" dataset1 = pd.read_csv(dataset1_path) dataset2 = pd.read_csv(dataset2_path) dataset3 = pd.read_csv(dataset3_path) dataset4 = pd.read_csv(dataset4_path) # Load the new dataset # Remove the last row of data def remove_last_row(data): return data.iloc[:-1] dataset1 = remove_last_row(dataset1) dataset2 = remove_last_row(dataset2) dataset3 = remove_last_row(dataset3) dataset4 = remove_last_row(dataset4) # Function to calculate and print statistics def print_statistics(dataset, name): print(f"\nStatistics for {name}:") stats = dataset.describe() print(stats.loc[['min', 'max', 'mean', 'std']]) # Print statistics for each dataset print_statistics(dataset1, 'Dataset 185') print_statistics(dataset2, 'Dataset 210') print_statistics(dataset3, 'Dataset 220') print_statistics(dataset4, 'Dataset 230i') # Combine datasets combined_dataset = pd.concat([dataset1, dataset2, dataset3, dataset4]).reset_index(drop=True) # Print statistics for the combined dataset print_statistics(combined_dataset, 'Combined Dataset') # Print the shape and first few lines of the combined dataset print("\nCombined dataset shape:", combined_dataset.shape) print("First few lines of the combined dataset:") print(combined_dataset.head()) # Save the combined dataset to a CSV file combined_dataset_path = "/groups/ocean/qifan/draft/CombinedDatasetLongUnsmooth.csv" combined_dataset.to_csv(combined_dataset_path, index=False) print(f"Combined dataset saved at: {combined_dataset_path}") # Plot the time series for AMOC plt.figure(figsize=(17, 8)) plt.plot(combined_dataset.index, combined_dataset['AMOC'], label='AMOC', color='skyblue') plt.xlabel('Time Index') plt.ylabel('AMOC') plt.title('Combined Time Series of AMOC') plt.legend() plt.grid(True) plt.tight_layout() combined_plot_path = "/groups/ocean/qifan/draft/CombinedLongTimeSeriesAMOC.png" plt.savefig(combined_plot_path) plt.show() print(f"Combined time series plot saved at: {combined_plot_path}") import xarray as xr import numpy as np import pandas as pd import os # Function to calculate the maximum AMOC at the equator between 500 and 2000m depth def get_amoc_max_at_equator(file_path): try: print("Starting AMOC max calculation at the equator...") ds = xr.open_dataset(file_path, decode_times=False) lat_aux_grid = ds['lat_aux_grid'] moc_z = ds['moc_z'] # Select the equator (0 degrees latitude) lat_idx = np.abs(lat_aux_grid.values - 0).argmin() # Select the depth range (500m to 2000m) depth_min_idx = np.abs(moc_z.values - 50000).argmin() depth_max_idx = np.abs(moc_z.values - 200000).argmin() # Extract the data within the selected depth range amoc_data = ds['MOC'].isel(lat_aux_grid=lat_idx, moc_z=slice(depth_min_idx, depth_max_idx), transport_reg=1, moc_comp=0) # Get the maximum AMOC value and the corresponding depth amoc_max = amoc_data.max(dim='moc_z', skipna=True) amoc_max_depth = moc_z.isel(moc_z=amoc_data.argmax(dim='moc_z')) print(f"Max AMOC value: {amoc_max.values} occurs at depth: {amoc_max_depth.values / 100} meters") return amoc_max.values, amoc_max_depth.values / 100 # Return values in meters except Exception as e: print(f"Failed to calculate AMOC max at equator: {e}") return None, None # Function to calculate the minimum MOC at 30S between 1000 and 4000m depth def get_moc_min_at_30s(file_path): try: print("Starting MOC min calculation at 30S...") ds = xr.open_dataset(file_path, decode_times=False) lat_aux_grid = ds['lat_aux_grid'] moc_z = ds['moc_z'] # Select 30S latitude lat_idx = np.abs(lat_aux_grid.values - -30).argmin() # Select the depth range (1000m to 4000m) depth_min_idx = np.abs(moc_z.values - 100000).argmin() depth_max_idx = np.abs(moc_z.values - 400000).argmin() # Extract the data within the selected depth range moc_data = ds['MOC'].isel(lat_aux_grid=lat_idx, moc_z=slice(depth_min_idx, depth_max_idx), transport_reg=0, moc_comp=0) # All ocean # Get the minimum MOC value and the corresponding depth moc_min = moc_data.min(dim='moc_z', skipna=True) moc_min_depth = moc_z.isel(moc_z=moc_data.argmin(dim='moc_z')) print(f"Min MOC value: {moc_min.values} occurs at depth: {moc_min_depth.values / 100} meters") return moc_min.values, moc_min_depth.values / 100 # Return values in meters except Exception as e: print(f"Failed to calculate MOC min at 30S: {e}") return None, None # Paths to the .nc files amoc_file_path = '/groups/ocean/qifan/annual_data/cesmi6gat31rblc220/cesmi6gat31rblc220_ocn_MOC_ANN.nc' moc_file_path = '/groups/ocean/qifan/annual_data/cesmi6gat31rblc220/cesmi6gat31rblc220_ocn_MOC_ANN.nc' # Get the max AMOC at equator and min MOC at 30S amoc_max, amoc_max_depth = get_amoc_max_at_equator(amoc_file_path) moc_min, moc_min_depth = get_moc_min_at_30s(moc_file_path) # Load the existing CSV file csv_file_path = '/groups/ocean/qifan/draft/combined_dataset220.csv' df = pd.read_csv(csv_file_path) # Update the DataFrame with the new values df['AMOC'] = amoc_max df['MOC_30SAABW'] = moc_min # Drop the depth columns if they exist if 'AMOC_Max_Depth' in df.columns: df = df.drop(columns=['AMOC_Max_Depth']) if 'MOC_Min_Depth' in df.columns: df = df.drop(columns=['MOC_Min_Depth']) # Save the updated DataFrame back to the CSV file df.to_csv(csv_file_path, index=False) print(f"Updated CSV file saved to {csv_file_path}") # Print the dropped depth values print(f"\nAMOC Max Depth: {amoc_max_depth} meters") print(f"MOC Min Depth: {moc_min_depth} meters") # Load the dataset print("\nLoading and displaying updated dataset information...\n") # Display the head of the dataset print("Head of the dataset:") print(df.head()) # Display the shape of the dataset print("\nShape of the dataset:") print(df.shape) # Display the minimum values in the dataset print("\nMinimum values in the dataset:") print(df.min()) # Display the maximum values in the dataset print("\nMaximum values in the dataset:") print(df.max()) # Display the first few rows of the dataset print("\nFirst few rows of the dataset:") print(df.iloc[:5]) ##### Fix TAUX ##### import xarray as xr import numpy as np import pandas as pd # Function to extract and calculate the Southern Ocean TAUX time series def get_southern_ocean_taux(file_path, lat_range, lon_range): try: print("Starting Southern Ocean TAUX calculation...") ds = xr.open_dataset(file_path, decode_times=False) taux = ds['TAUX'].sel(lat=slice(*lat_range), lon=slice(*lon_range)) # Average over latitude and longitude for each time step taux_avg = taux.mean(dim=['lat', 'lon'], skipna=True).values # Get the mean TAUX over time steps (this will return an array) print(f"Southern Ocean TAUX time series calculated successfully.") return taux_avg except Exception as e: print(f"Failed to calculate Southern Ocean TAUX: {e}") return None # Path to the TAUX .nc file taux_file_path = '/groups/ocean/qifan/annual_data/cesmi6gat31rblc230i/cesmi6gat31rblc230i_atm_TAUX_ANN.nc' # Southern Ocean latitude and longitude ranges southern_ocean_lat_range = (-70, -50) southern_ocean_lon_range = (0, 360) # Get the Southern Ocean TAUX time series southern_ocean_taux_series = get_southern_ocean_taux(taux_file_path, southern_ocean_lat_range, southern_ocean_lon_range) # Load the existing CSV file csv_file_path = '/groups/ocean/qifan/draft/combined_dataset230i.csv' df = pd.read_csv(csv_file_path) # Check if the length of TAUX series matches the DataFrame's length if len(southern_ocean_taux_series) == len(df): # Update the DataFrame with the new Southern Ocean TAUX time series df['Southern_Ocean_TAUX'] = southern_ocean_taux_series else: print("Length mismatch between TAUX series and DataFrame. Please check the data alignment.") # Save the updated DataFrame back to the CSV file df.to_csv(csv_file_path, index=False) print(f"Updated CSV file saved to {csv_file_path}") # Load the dataset print("\nLoading and displaying updated dataset information...\n") # Display the head of the dataset print("Head of the dataset:") print(df.head()) # Display the shape of the dataset print("\nShape of the dataset:") print(df.shape) # Display the minimum values in the dataset print("\nMinimum values in the dataset:") print(df.min()) # Display the maximum values in the dataset print("\nMaximum values in the dataset:") print(df.max()) # Display the first few rows of the dataset print("\nFirst few rows of the dataset:") print(df.iloc[:5]) #import pandas as pd # Load the existing CSV file #csv_file_path = '/groups/ocean/qifan/draft/CombinedDatasetLongUnsmooth.csv' #df = pd.read_csv(csv_file_path) # Rename the 'Southern_Ocean_TAUX' column to 'TAUX' #df.rename(columns={'Southern_Ocean_TAUX': 'TAUX'}, inplace=True) # Save the updated DataFrame back to the CSV file #df.to_csv(csv_file_path, index=False) # Display the first few rows to confirm the change #print(df.head()) # Add AMOC 40N import xarray as xr import numpy as np import pandas as pd import os # Function to calculate the maximum AMOC at 40 degrees north between 500 and 2000m depth def get_amoc_max_at_40N(file_path): try: print("Starting AMOC max calculation at 40N...") ds = xr.open_dataset(file_path, decode_times=False) lat_aux_grid = ds['lat_aux_grid'] moc_z = ds['moc_z'] # Select 40N latitude lat_idx = np.abs(lat_aux_grid.values - 40).argmin() # Select the depth range (500m to 2000m) depth_min_idx = np.abs(moc_z.values - 50000).argmin() depth_max_idx = np.abs(moc_z.values - 200000).argmin() # Extract the data within the selected depth range amoc_data = ds['MOC'].isel(lat_aux_grid=lat_idx, moc_z=slice(depth_min_idx, depth_max_idx), transport_reg=1, moc_comp=0) # Get the maximum AMOC value and the corresponding depth amoc_max = amoc_data.max(dim='moc_z', skipna=True) amoc_max_depth = moc_z.isel(moc_z=amoc_data.argmax(dim='moc_z')) print(f"Max AMOC value at 40N: {amoc_max.values} occurs at depth: {amoc_max_depth.values / 100} meters") return amoc_max.values, amoc_max_depth.values / 100 # Return values in meters except Exception as e: print(f"Failed to calculate AMOC max at 40N: {e}") return None, None # Paths to the .nc file amoc_file_path = '/groups/ocean/qifan/annual_data/cesmi6gat31rblc230i/cesmi6gat31rblc230i_ocn_MOC_ANN.nc' #185, 210, 220, 230 # Get the max AMOC at 40N amoc_max_40N, amoc_max_depth_40N = get_amoc_max_at_40N(amoc_file_path) # Load the existing CSV file csv_file_path = '/groups/ocean/qifan/draft/combined_dataset230i.csv' df = pd.read_csv(csv_file_path) # Update the DataFrame with the new AMOC_40N values df['AMOC_40N'] = amoc_max_40N # Save the updated DataFrame back to the CSV file df.to_csv(csv_file_path, index=False) print(f"Updated CSV file saved to {csv_file_path}") # Print the dropped depth values for information print(f"\nAMOC Max Depth at 40N: {amoc_max_depth_40N} meters") # Load and display the updated dataset information print("\nLoading and displaying updated dataset information...\n") # Display the head of the dataset print("Head of the dataset:") print(df.head()) # Display the shape of the dataset print("\nShape of the dataset:") print(df.shape) # Display the minimum values in the dataset print("\nMinimum values in the dataset:") print(df.min()) # Display the maximum values in the dataset print("\nMaximum values in the dataset:") print(df.max()) # Display the first few rows of the dataset print("\nFirst few rows of the dataset:") print(df.iloc[:5]) ###Fix SALT_150m### ###Fix PD### import pandas as pd import numpy as np import xarray as xr # Function to read .nc files (unchanged) def read_nc_file(file_path, var_name, column_head, lat_range, lon_range, depth=None): try: dataset = xr.open_dataset(file_path, decode_times=False) data = dataset[var_name] if 'TLAT' in dataset: lat = dataset['TLAT'] lon = dataset['TLONG'] else: lat = dataset['lat'] lon = dataset['lon'] if 'z_t' in dataset.dims and depth is not None: z_t = dataset['z_t'] depth_idx = np.abs(z_t.values - depth).argmin() data = data.isel(z_t=depth_idx) data = data.where((lat >= lat_range[0]) & (lat <= lat_range[1]) & (lon >= lon_range[0]) & (lon <= lon_range[1]), drop=True) time = dataset['time'] print(f"Data read successfully with {len(time)} time steps for {column_head}") return time, data except KeyError: print(f"Variable '{var_name}' not found in the file {file_path}.") return None, None except Exception as e: print(f"Failed to read the .nc file {file_path} with variable {var_name}: {e}") return None, None # Function to preprocess additional data (only the new variables) def preprocess_additional_data(file_paths, var_names, column_heads, lat_ranges, lon_ranges, depths=None): all_data = [] valid_var_names = [] for file_path, var_name, column_head, lat_range, lon_range, depth in zip(file_paths, var_names, column_heads, lat_ranges, lon_ranges, depths): actual_var_name = var_name.split('_')[0] # Get the actual variable name without depth time, data = read_nc_file(file_path, actual_var_name, column_head, lat_range, lon_range, depth) if time is not None and data is not None: valid_var_names.append(column_head) # Append only if data is successfully read data = data.mean(dim=['nlat', 'nlon'] if 'nlat' in data.dims else ['lat', 'lon'], skipna=True).values print(f"Processed data for {column_head}:") print(f"First five values: {data[:5]}") print(f"Shape of data: {data.shape}") print(f"Max value in {column_head}: {np.nanmax(data)}") print(f"Min value in {column_head}: {np.nanmin(data)}") all_data.append(data) if all_data: combined_data = np.vstack(all_data).T print(f"Combined data shape for {valid_var_names}: {combined_data.shape}") return combined_data, valid_var_names else: return None, None # Define additional variables to add additional_variables = [ {'name': 'SALT', 'column_head': 'SALT_150m', 'file_path': '/groups/ocean/qifan/annual_data/cesmi6gat31rblc230i/cesmi6gat31rblc230i_ocn_SALT_ANN.nc', 'lat_range': (50, 70), 'lon_range': (300, 360), 'depth': 15000}, {'name': 'PD_0m', 'column_head': 'PD_0m_0to50', 'file_path': '/groups/ocean/qifan/annual_data/cesmi6gat31rblc230i/cesmi6gat31rblc230i_ocn_PD_ANN.nc', 'lat_range': (0, 50), 'lon_range': (300, 360), 'depth': 0}, {'name': 'PD_200m', 'column_head': 'PD_200m_0to50', 'file_path': '/groups/ocean/qifan/annual_data/cesmi6gat31rblc230i/cesmi6gat31rblc230i_ocn_PD_ANN.nc', 'lat_range': (0, 50), 'lon_range': (300, 360), 'depth': 20000} ] # Extract file paths, variable names, column headers, latitudes, longitudes, and depths for the additional variables additional_file_paths = [var['file_path'] for var in additional_variables] additional_var_names = [var['name'] for var in additional_variables] additional_column_heads = [var['column_head'] for var in additional_variables] additional_lat_ranges = [var['lat_range'] for var in additional_variables] additional_lon_ranges = [var['lon_range'] for var in additional_variables] additional_depths = [var['depth'] for var in additional_variables] # Preprocess the additional variables additional_data, additional_valid_var_names = preprocess_additional_data( additional_file_paths, additional_var_names, additional_column_heads, additional_lat_ranges, additional_lon_ranges, additional_depths ) # Load the existing CSV file csv_path = '/groups/ocean/qifan/draft/combined_dataset230i.csv' existing_df = pd.read_csv(csv_path) # Add the new columns to the existing DataFrame if additional_data is not None: for i, col_name in enumerate(additional_valid_var_names): existing_df[col_name] = additional_data[:, i] # Save the updated DataFrame to the CSV file updated_csv_path = '/groups/ocean/qifan/draft/combined_dataset_with_new_columns_230i.csv' existing_df.to_csv(updated_csv_path, index=False) # Print the head and first few rows of the updated dataset print(f"Updated dataset saved as {updated_csv_path}") print("First few rows of the updated dataset:") print(existing_df.head()) else: print("No additional data to add to the CSV.") #New Combine dataset import pandas as pd import numpy as np import matplotlib.pyplot as plt # Load datasets dataset1_path = "/groups/ocean/qifan/draft/combined_dataset_with_new_columns_185.csv" dataset2_path = "/groups/ocean/qifan/draft/combined_dataset_with_new_columns_210.csv" dataset3_path = "/groups/ocean/qifan/draft/combined_dataset_with_new_columns_220.csv" dataset4_path = "/groups/ocean/qifan/draft/combined_dataset_with_new_columns_230i.csv" dataset1 = pd.read_csv(dataset1_path) dataset2 = pd.read_csv(dataset2_path) dataset3 = pd.read_csv(dataset3_path) dataset4 = pd.read_csv(dataset4_path) # Load the new dataset # Remove the last row of data def remove_last_row(data): return data.iloc[:-1] dataset1 = remove_last_row(dataset1) dataset2 = remove_last_row(dataset2) dataset3 = remove_last_row(dataset3) dataset4 = remove_last_row(dataset4) # Function to calculate and print statistics def print_statistics(dataset, name): print(f"\nStatistics for {name}:") stats = dataset.describe() print(stats.loc[['min', 'max', 'mean', 'std']]) # Print statistics for each dataset print_statistics(dataset1, 'Dataset 185') print_statistics(dataset2, 'Dataset 210') print_statistics(dataset3, 'Dataset 220') print_statistics(dataset4, 'Dataset 230i') # Combine datasets combined_dataset = pd.concat([dataset1, dataset2, dataset3, dataset4]).reset_index(drop=True) # Print statistics for the combined dataset print_statistics(combined_dataset, 'Combined Dataset') # Print the shape and first few lines of the combined dataset print("\nCombined dataset shape:", combined_dataset.shape) print("First few lines of the combined dataset:") print(combined_dataset.head()) # Save the combined dataset to a CSV file combined_dataset_path = "/groups/ocean/qifan/draft/CombinedDatasetLongUnsmoothNewColumns.csv" combined_dataset.to_csv(combined_dataset_path, index=False) print(f"Combined dataset saved at: {combined_dataset_path}") # Plot the time series for AMOC plt.figure(figsize=(17, 8)) plt.plot(combined_dataset.index, combined_dataset['AMOC'], label='AMOC', color='skyblue') plt.xlabel('Time Index') plt.ylabel('AMOC') plt.title('Combined Time Series of AMOC') plt.legend() plt.grid(True) plt.tight_layout() combined_plot_path = "/groups/ocean/qifan/draft/CombinedLongTimeSeriesAMOC.png" plt.savefig(combined_plot_path) plt.show() print(f"Combined time series plot saved at: {combined_plot_path}")