> ###################################################################################### > ################################ EnSRF_switches.R #################################### > ###################################################################################### > > expname="assim_ghcn_d_better_precip_R30_timeloc" > version="v1.3" # set code version number for experiment and netcdf file names > # v1.1 at DKRZ is experiment 1.2 here! > > ##################################################################################### > # general switches > ##################################################################################### > > fsyr=1602 # full period start year for data generation that allows 71-yr anom. calc. > feyr=2004 > syr_cru=1901 > eyr_cru=2004 > syr_recon=1750 > eyr_recon=1900 > syr_twentycr=1901 # currently statistics only work for same periods of CRU and 20CR (20CR data actually start from 1850) > eyr_twentycr=2004 > #syr_ind=1901 > #eyr_ind=2004 > > #!!!ATTENTION: sixmonstatevector year starts in October of previous year (cyr-1) > sixmonstatevector=T # 6 months of data in state vector for real proxy multiple > # regression approach. ATTENTION: monthly has to be TRUE > if (sixmonstatevector) { + s <- 2 # only 2 seasons to calculate but still monthly results in long state vector + } else { + s <- 12 + } > season=c(3,9) # 3,9 = apr-sep and oct-mar, num=end month of season; # season=c(2,5,8,11) > nmem=30 # number of ensemble members > ##################################################################################### > # EnSRF_data switches > ##################################################################################### > > # load or generate data from scratch > # generate MODEL data in .Rdata format > generate_ECHAM=F # if TRUE -> orig. echam data is read > generate_ECHAM_103=F # ECHAM ens. mem. 103 with corrected land use forcing > generate_ECHAM_covar=F # generate_ECHAM all time step array long-term covariance") > generate_ECHAM_anom=F # read echam anom, clim and sd from cdo > # ATTENTION if echam=T the proxy/instr. data have to be generated, too > > # old switches, not working anymore > #generate_ECHAM_1901_70=F # ECHAM data for bias calc with real_prox data > #generate_NCEP=F # generate NCEP/NCAR reanalysis for independent verification > > # generate VALIDATION data in .Rdata format > # next line not included yet: > generate_20CR=F # generate 20CR reanalysis for independent verification > generate_CRUALLVAR=F # if FALSE -> cru_allvar.Rdata > generate_HadCRU4=F # HadCRU ens. SD for instr. uncertainty and error-spread ratio > generate_LUTPAULKUT=F # gridded seasonal recons (1750-1999) > generate_ind_recon=F # read Stefan's indices 1900-2000 from .txt to .RData > # generate INPUT/ASSIMILATION data in .Rdata format > # use scripts in data_yuri to generate .Rdata files > generate_t_yuri=F # if TRUE -> yuri's temp. data collection including HISTALP is read > generate_slp_yuri=F # if TRUE -> yuri's slp data collection is read > statyr=1905 # 1941 1850/69 year, when GHCN/ISTI station network is kept constant > generate_GHCN=F # if TRUE -> orig. GHCN stat. data is read; > generate_GHCN_precip=F # if FALSE -> ghcn.Rdata > generate_DOCUM=F # if TRUE -> yuri's docu. data collection is read > # do we still need next 3 lines? why after generate_proxies in EnSRF_generate? > generate_PAGES=F # using the screened PAGES proxy dataset > # CODE from Roni missing to convert csv to RData > generate_NTREND=F # CODE from Roni missing to convert csv to RData > # REMEMBER to use 12 months temp. forward model and yearly output for next which > generate_PSEUDO=F # DAPS PSEUDO PROXY EXPERIMENT: Set pseudo_prox to T further down (approx Line 145) > # if F but pseudo_prox below is T, then .RData file is loaded > > > # At the moment: any combination of read.these should be possible > # To do: noch ntrend machen mit regressionmonth etc - gemacht aber ntrend proxies nicht mehr im verz. > # Testen: verschiedene regression_months mit und ohne t/p usw. > # bei read_pages ist das directory von pagesproxies noch zu verallgemeinern > # Abklären: ist in den jeweiligen read_proxy_mxd zb t4 immer von t1-t12, weil sonst machen > # die colnames nicht Sinn!!!!! Zb Zeile: > # Weil in Pages sind es nicht dieselben > > generate_PROXIES=F > # for multiple parallel runs generate_PROXIES only has to be true for the first run > # once they are create they can be loaded with this switch=F > # NOTE: The proxy data set switches need to remain to automatically set real_prox=T in EnSRF data script > #if (generate_PROXIES==T) { > # You can choose any combination of months and variable (T&P) for regression_months. > # Then you can choose for each source whether it to be included or not. > # The resulting realprox$mr is a matrix of dimension [1:x,1:25], where x depends on your chosen sources. > # The 25 originates from 12 months for T and 12 for P plus the intercept. > # The respective columns that were not chosen remain NA. > # For MXD and SCHWEINGR it only takes the temperature and leaves the precip. months NA. > # PAGES_tree data also consists of location on the SH: if for ex. t4 (is chosen), it takes t10 (t4+6) > # for any locations with lat<0. > > regression_months = c('t.first', 't.second','t.third','t.fourth','t.fifth','t.sixth') > #regression_months = c('t.first', 't.second','t.third','t.fourth','t.fifth','t.sixth', > # 'p.first', 'p.second','p.third','p.fourth','p.fifth','p.sixth') > > #### PROXIES #### > TRW=F # 35 best TRW records from Petra's collection > MXD=F # additional MXD, not gridded Schweizgruber data > SCHWEINGR=F # Schweingruber/Briffa MXD grid > NTREND=F # NTREND best tree data version 2018 (identical with 2015 paper) > PAGES=F # PAGESdata base version 2 from 01/2018 > TRW_PETRA=F # all TRW series from ITRDB and recalibrated by Petra > ################# > > > > # ATTENTION: precip. forward model only works with CRU and NOT GISS > lm_fit_data = "GISS" # can be CRU or GISS to calculate the reg coeff-s (GISS is land+ocean temp only) > type = c("coral") # only works with tree and coral (and both indiviually as well) > #} > # END if generate_PROXIES > > > # Nevin: May 2018 > ####### SCREENING FOR PROXIES ########## > # (For now only works for temperature) > # Only either AIC or PVALUE can be TRUE, if both are set to TRUE only the AIC part will be run > # if neither of AIC and PVALUE are TRUE then the full model is used without screening > > AIC=T # calculates linear regression models for different continuous > # subperiods and takes the best one according to the AIC value. > # furthermore, it only keeps the signifcant models with pvalue>alpha (alpha set below) > # Example T1-T6 AIC = 9, T2-T4 AIC=-1 (all combinations are respected) > # => smallest AIC => best model=> if not significant => tree excluded > > PVALUE=T # calculates only the full regression model and only keeps the significant ones (pval>alpha) > alpha=0.05 # Significance level default: 0.05 or 0.01 > avg_realprox_per_grid=F # if more than one tree is situated in one Echam-Gridcell an average > # of all treeringwidth is calculated before makeing the regression model. Because of the independed > # loading of the different datasets (ntrend, pages, petra), the average is only calculated taken > # from trees of the same dataset ->if all 3 datasets are used it can occur, that still 3 averaged > # trees are in one gridbox. > > > #### Instrumental Data #### > old_statvec = F > new_statvec = T # has +: wetdays, block, cycfreq; -: v200, t500 > yuri_temp=F # yuri's data compilation, SLP always loaded > yuri_slp=F > inst_slp_err = sqrt(10) # instrumental slp error (10 is the variance of slp error) > ghcn_temp=F > inst_t_err = sqrt(0.9) # instrumental temp error (0.9 is the variance of temp error) > isti_instead_ghcn=F # switch from ghcn to isti (ghcn_temp must still be set to TRUE) > ghcn_prec=T > ghcn_p_err = 0.3 # error in percent (based on US stations estimation should be 30%) > ghcn_p_min = 10 # minimum error 10 mm > precip_ratio= F # if T assimilating ratio, if F assimilating the difference > gauss_ana =F # use Gaussian anamorphosis for precipitation ratio > check_norm = F # check whether the GA transformed values normally distributed and use only those that are > ghcn_wday =T # assimilating wetdays calculated from daily precip ghcn data > ghnc_w_err = 2 # error number of days (based on US stations estimation should be 2 days) > #### Documentary Data #### > import_luca=F # new docu data, if it is T then docum part is T > docu_err= sqrt(0.25) # equals 0.5 std. dev. > #### Pseudo Proxy Data #### > pseudo_prox=F # use DAPS Pseudo-Proxies and annual resolution Jan-Dez > if (pseudo_prox) { + if (generate_PROXIES==T) {stop("pseudo_prox and generate_PROXIES cannot be used together")} + sixmonstatevector=F # using annual mean + s <- 1 + season=12 # year from jan-dec + print("ATTENTION: sixmonstatevector has been set to FALSE for annual resolution pseudoproxy experiment!") + } > # all available data selected above are automatically switched on when available in EnSRF_data > > # if (generate_PROXIES){ > # if ((generate_PAGES & PAGES) | (generate_NTREND & NTREND) | (trw_only) | (mxd_only) | (schweingr_only)){ > # stop("WARNING! These switches should not be set to TRUE simultaneously: > # generate_PAGES & PAGES > # generate_NTREND & NTREND > # generate_PROXIES & trw_only > # generate_PROXIES & mxd_only > # generate_PROXIES & schweingr_only > # ") > # } > # } > > ################# > # To use a bigger ensemble for the background > no_forc_big_ens= F # use all years as one big ensemble regardless of forcing like LMR > # ONLY works with next option load_71yr_anom=T > covarclim=0 # set 50 or 100 [%] how much echam climatology covariance should be used > # default=0, i.e. current year covar from ECHAM ensemble > cov_inflate = F # inflate the PB matrix > inflate_fac = 1.02 # the factor of covariance inflation > # Only used if no_forc_big_ens=T or covarclim>0 > state = "changing" # can be "static" or "changing" (static = the same big ens used for all year, changing = it is recalculated for every year) > n_covar=250 # set sample size for covar calc or for no_forc LMR like experiment, e.g. 250 or 500 > PHclim_loc = F # whether we want to localize the PHclim, only works if covarclim > 0 > PHclim_lvec_factor = 2 # if PHclim_loc=T, we can use eg. 2times the distances as in the 30 ensemble member, at the moment only works for shape_wgt= "circle" > mixed_loc = F # first combining Pb and Pclim then localizing > update_PHclim = F # whether PHclim should be updated assimilating observation-by-observation > save_ananomallts = F # in the covarclim exps if we update the climatology part -> whether to save the "climatological" analysis or not > if((covarclim>0)&no_forc_big_ens){ + stop("Warning: covarclim>0 and no_forc_big_ens are both TRUE: These two experiments cannot be combined") + } > # Calculate decorr length -> was done already > calc_decorr_dist=F # calculate decorrelation distance for each variable from ECHAM to set L > region = "global" # region: where the decorrelation length should be calculated > # default = "global" > # can select: "golbal", "ENH", "ESH", "tropics", "lat_band", "lon_band" > cor_length_period = "annual" # period: over which the decorrelation length should be calculated > # default = "annual > # can select: "annual", "summer", "winter" > # for corr_over_region function both region and cor_length_period is needed > # for compute_dist_2d function region is needed > > # Localizing the 30 ensemble members: distance and shape > loc=T # T = WITH localization, F without > if (loc) { + l_dist_temp2=1000*1.5 # factor *1.5 after stefans recommendation + l_dist_slp=1800*1.5 + l_dist_precip=300*1.5 + #l_dist_precip=900 + l_dist_gph500=1800*1.5 + l_dist_gph100=2500*1.5 + l_dist_u850=1200*1.5 + l_dist_u200=1200*1.5 + l_dist_v850=1000*1.5 + l_dist_v200=1000*1.5 + l_dist_omega500=300*1.5 + # l_dist_t850=1000*1.5 #Roni: in the echam there is no t850 but t500. They refer to the same level but I forgot which one is the correct one + l_dist_t500=1000*1.5 + l_dist_ind=999999 # precalculated indices should be removed + l_dist_wdays = 300*1.5 + #l_dist_wdays = 900 + l_dist_blocks = 1800*1.5 + l_dist_cycfreq = 1800*1.5 + + + + } else { + l_dist_temp2=999999 + l_dist_slp=999999 + l_dist_precip=999999 + l_dist_gph500=999999 + l_dist_gph100=999999 + l_dist_u850=999999 + l_dist_u200=999999 + l_dist_v850=999999 + l_dist_v200=999999 + l_dist_omega500=999999 + l_dist_t500=999999 + # l_dist_t850=999999 # see above + l_dist_ind=999999 + } > shape_wgt = "circle" # can be "circle" or "ellipse" depends on how we want to do the localization > # default is "circle" > > > # ATTENTION: landcorrected only works with anomaly_assim==T and every2grid==T!!! > landcorr = F # use simulation WITHOUT land use bug if TRUE > # how to treat multiple input series in same grid box > best2worst=T # order proxies from best first to worst last based on residuals > # if FALSE then worst first and best last > best_prox_per_grid=F # use only one real proxy record with lowest residuals per grid cell > bestproxres=0.1 # generates lon/lat grid with set resolution 0.1 mean only best proxy in ~10km2 grid > first_inst_per_grid=F # first instrumental observations per echam grid box > # ATTENTION: only this or second next option (avg_prox_per_grid) can be TRUE > # "DON'T USE FIRST_INST_PER_GRID IF YOU WANT TO INCLUDE REAL PROXY DATA AND > # INSTRUMENTALS AT THE SAME TIME") > firstinstres=1 # grid resolution for instr. stations (5 = echamgrid/5) > avg_obs_per_grid=T # average more than one observation per echam grid box > # and calc proxy vs echam correlation > ins_tim_loc = T # whether the instrumental obs-s should be localized in time or not > instmaskprox=F # remove proxy data from grid boxes that have instr. data > reduced_proxies=F # use every ??th (see code below) proxy record > every2grid=T # only use every third grid cell of ECHAM, CRU validation, ... > land_only=F # calc on land only > tps_only=T # only use temp, precip and slp in state vector, remove other vars > tpsw_only=F # only use temp, precip, slp and wetdays in state vector, remove other vars > no_stream=F # all echam vars but stream function as there is problem with > # # 5/9 levels, which are in lat dimension before and after 1880 > loo=F # leave-one-out validation > if (loo) {tps_only=T;no_stream=F} # reduce state vector for faster validation > #load_71yr_anom=T # load 71yr echam anomalies calculated with cdo > #anom_reload=F # reload anom calculated in R (next option) > #anom_save=F # save anom calculated in R to reload next time > #if (load_71yr_anom==T) { > # anom_reload=F > # anom_save=F} > check_assimdata=T # screen assimilation data before using it > if (no_stream & tps_only & tpsw_only) { + stop("Either no_stream or tps_only or tpsw_only has to be TRUE but not both!") + }else if(!tps_only & !no_stream & !tpsw_only) { + stop("Either no_stream or tps_only or tpsw_only has to be TRUE but not both!") + } > # other options > scaleprox=T # scale standardized docu to echam variance at location > # not necessary for proxy data because of regression model > anomaly_assim=T # work with anomalies to avoid reg. const in state vector > # nseas <- 12 # year with 12 months > check_dist=F # test for ideal cut-off distance of spatial correlations > #H_non_lin=F # new H operator that also allows non-linear functions > ana.enssize=F > #NCEP_SOCOL=F > > # choose validation data sets saved in the analysis step (EnSRF_data): > # (all three can be selected simultaneously) > vali_cru=T > vali_twentycr=F > vali_recon=F > ##################################################################################### > > > > > > > > ##################################################################################### > # prepare plot switches > ##################################################################################### > > validation_set=c("cru_vali") #can be set to cru_vali, or twentycr_vali or > # both together c("cru_vali","twentycr_vali") > # choses which validation set should be used in the postprocessing and plots > monthly_out=F # if sixmonstatevector=T output is backtransformed to seasonal > # yearly out is old switch from nevin. annual output automatically if pseudo_prox=T > yearly_out=F # if both false, plots seasonal averages are calculated > # average or monthly data if monthly_out=T > temporal_postproc=F # save half year averages calc from monthly data into /prepplot folder > mergetime_indices=T # if TRUE: indices are combined to allts variables for whole period > # (e.g. also for 1604-2004) and saved into image folder for TS-plots > # run next option "load_prepplot" for entire validation period, usually > # 1902-2003, because it creates time series > mergetime_fields=T # if calc_prepplot has been run, load_prepplot can be used > # saves image and only needs to be run once, afterward set "load_images=T" > load_images=F # directly load image for syr-eyr period: 1902-2001 or 1651-1750 image > # of merged indices and fields. NOT need if just calculated before > calc_vali_stat=T # calculate validation statistics after preparation (set "load_image=T") > CRPS=F # calculate Continuous Ranked Probability Score > ind_anom=T # calculate indices from anomaly data > vali_plots=T # source EnSRF_plots.R script > #ind_ECHAM=T # delete/comment code in prepplot script and then delete switches here > #ind_recon=F # delete/comment code in prepplot script and then delete switches here > > > write_coor=F # write ascii files with assimilated stations and data per ts > # maybe change files names for new EKF400 version "1.0" to "1.1" > # write_netcdf requires to run calc_postproc before > # best set mergetime_*=F and load_image=F > write_netcdf=F # write entire EKF400 to NetCDF files > if (!monthly_out & write_netcdf) { + write_netcdf=F + print('ACHTUNG: write_netcdf set to FALSE because monthly_out=F') + } > ##################################################################################### > > > > ##################################################################################### > # plot switches > ##################################################################################### > > monthly=F > station_yr=1905 # specific year to plot the station locations (syr>=station_yr<=eyr) > #pseudoproxy=F # old from Nevin pseudoproxy try > #plot_dweights=F > #write_nc=F > #recalc <- F > #reload <- F > plstat <- NULL #calibrate # NULL or calibrate > countseries <- T > #PAGES <- F # write output for PAGES paper > > # -----------------------------------------------------------------------------------------