Jackknife

Jackknife method

Jackknife implementation

g_mk_jk(data_list, jk_idx_list, *[, avg])

Perform (randomized) Super-Jackknife for the Jackknife data set.

g_mk_jk_val(rs_tag, val, err, *, jk_type, ...)

Create a jackknife sample with random numbers based on central value val and error err.

g_jk_avg(jk_arr, **_kwargs)

Return avg of the jk_arr.

g_jk_err(jk_arr, *, eps, jk_type, **_kwargs)

Return err of the jk_arr.

g_jk_avg_err(jk_arr, **kwargs)

Return (avg, err,) of the jk_arr.

g_jk_avg_err_arr(jk_arr, **kwargs)

Return avg_err_arr of the jk_arr.

g_jk_size(*, jk_type, all_jk_idx, ...)

Return number of samples for the (randomized) Super-Jackknife data set.

g_jk_blocking_func(i, jk_idx, *, ...)

Return jk_blocking_func(jk_idx).

get_jk_state(*, jk_type, eps, n_rand_sample, ...)

Currently only useful if we set

set_jk_state(state)

Utilities

average(data_list)

avg_err(data_list, *[, eps, block_size])

Compute (avg, err) of data_list.

err_sum(*vs)

e.g.: q.err_sum(1.4, 2.1, 1.0) ==> 2.7147743920996454

block_data(data_list, block_size[, ...])

return the list of block averages the blocks may overlap if is_overlapping == True

fsqr(data)

Separately square real and imag part in case of complex types.

fsqrt(data)

Separately calculate the square root real and imag part in case of complex types.

jackknife(data_list, *[, eps])

Return jk[i] = avg - frac{eps}{N} (v[i] - avg) normal jackknife uses eps=1, scale the fluctuation by eps

jk_avg(jk_arr)

jk_err(jk_arr, *[, eps, block_size])

Return

jk_avg_err(jk_arr, *[, eps, block_size])

sjackknife(data_list, jk_idx_list, *[, avg, ...])

Super jackknife.

sjk_mk_jk_val(rs_tag, val, err, *[, ...])

return jk_arr n = n_rand_sample len(jk_arr) == 1 + n jk_arr[i] = val + err * r[i] for i in 1..n where r[i] ~ N(0, 1)

sjk_avg(jk_arr)

sjk_err(jk_arr, *[, eps])

Return

sjk_avg_err(jk_arr, *[, eps])

rjackknife(data_list, jk_idx_list, *[, avg, ...])

Jackknife-bootstrap hybrid resampling. Return jk_arr. len(jk_arr) == 1 + n_rand_sample distribution of jk_arr should be similar as the distribution of avg. r_{i,j} ~ N(0, 1) `` if is_normalizing_rand_sample: n_j = sum_i r_{i,j}^2 r_{i,j} <- sqrt{n_rand_sample / n_j} r_{i,j} data_list_real = [d for d in data_list if d is not None] data_arr = np.array(data_list_real, dtype=dtype) avg = average(data_arr) len(data_list_real) = n jk_arr[0] = avg jk_arr[i] = avg + sum_{j=1}^{n} (-eps/sqrt{n (n - b(i,j))}) r_{i,j} (data_list_real[j] - avg) `` where b(i,j) represent the block_size. if jk_blocking_func is provided: jk_blocking_func(i, jk_idx) => blocked jk_idx `` jk_arr[i] = avg + sum_{j=1}^{n} r_{i,jk_block_func(j)} (jk_arr[j] - avg) ``.

rjk_mk_jk_val(rs_tag, val, err, *[, ...])

return jk_arr n = n_rand_sample len(jk_arr) == 1 + n jk_arr[i] = val + err * r[i] for i in 1..n where r[i] ~ N(0, 1)

rjk_avg(jk_arr)

rjk_err(jk_arr[, eps])

Return

rjk_avg_err(rjk_list[, eps])

Example for the Jackknife-bootstrap hybrid method (described in the Jackknife method section): examples-py/jackknife-random.py

#!/usr/bin/env python3

import qlat as q
import numpy as np

q.begin_with_mpi()

q.default_g_jk_kwargs["jk_type"] = "rjk"
q.default_g_jk_kwargs["eps"] = 1
q.default_g_jk_kwargs["n_rand_sample"] = 1024
q.default_g_jk_kwargs["is_normalizing_rand_sample"] = False
q.default_g_jk_kwargs["is_apply_rand_sample_jk_idx_blocking_shift"] = True
q.default_g_jk_kwargs["block_size"] = 1
q.default_g_jk_kwargs["block_size_dict"] = {
    "job_tag_1": 1,
    "job_tag_2": 4,
}
q.default_g_jk_kwargs["rng_state"] = q.RngState("rejk")
q.default_g_jk_kwargs["all_jk_idx_set"] = set()

rs = q.RngState("seed1")
job_tag = "job_tag_1"
traj_list = list(range(20))

data_arr = rs.g_rand_arr((len(traj_list), 5,))  # can be list or np.array
jk_arr_1 = q.g_mk_jk(data_arr, [(job_tag, traj) for traj in traj_list])
avg, err = q.g_jk_avg_err(jk_arr_1)

for i in range(len(avg)):
    q.json_results_append(f"avg[{i}]", avg[i])
    q.json_results_append(f"err[{i}]", err[i])

rs = q.RngState("seed2")
job_tag = "job_tag_2"
traj_list = list(range(30))

data_arr = rs.g_rand_arr((len(traj_list), 5,))  # can be list or np.array
jk_arr_2 = q.g_mk_jk(data_arr, [(job_tag, traj) for traj in traj_list])
avg, err = q.g_jk_avg_err(jk_arr_2)

for i in range(len(avg)):
    q.json_results_append(f"avg[{i}]", avg[i])
    q.json_results_append(f"err[{i}]", err[i])

jk_arr = jk_arr_1 + jk_arr_2
avg, err = q.g_jk_avg_err(jk_arr)

for i in range(len(avg)):
    q.json_results_append(f"avg[{i}]", avg[i])
    q.json_results_append(f"err[{i}]", err[i])

jk_val_arr = q.g_mk_jk_val("val-tag", 1.0, 0.5)
avg, err = q.g_jk_avg_err(jk_val_arr)

q.json_results_append(f"avg", avg)
q.json_results_append(f"err", err)

jk_diff_arr = jk_arr - jk_val_arr[:, None]
avg, err = q.g_jk_avg_err(jk_diff_arr)

for i in range(len(avg)):
    q.json_results_append(f"avg[{i}]", avg[i])
    q.json_results_append(f"err[{i}]", err[i])

q.check_log_json(__file__, check_eps=1e-10)
q.end_with_mpi()
q.displayln_info(f"CHECK: finished successfully.")

Example for the conventional Super-Jackknife method: examples-py/jackknife-super.py

#!/usr/bin/env python3

import qlat as q
import numpy as np
import functools

q.begin_with_mpi()

job_tag_list = ['job_tag_1', 'job_tag_2', ]

@functools.lru_cache
def get_traj_list(job_tag):
    fname = q.get_fname()
    if job_tag == "job_tag_1":
        return list(range(20))
    elif job_tag == "job_tag_2":
        return list(range(30))
    else:
        raise Exception(f"{fname}: job_tag='{job_tag}'")
    return None

@functools.lru_cache
def get_all_jk_idx():
    jk_idx_list = ['avg', ]
    for job_tag in job_tag_list:
        traj_list = get_traj_list(job_tag)
        for traj in traj_list:
            jk_idx_list.append((job_tag, traj,))
    return jk_idx_list


q.default_g_jk_kwargs["jk_type"] = "super"
q.default_g_jk_kwargs["eps"] = 1
q.default_g_jk_kwargs["is_hash_jk_idx"] = True
q.default_g_jk_kwargs["jk_idx_hash_size"] = 1024
q.default_g_jk_kwargs["block_size"] = 1
q.default_g_jk_kwargs["block_size_dict"] = {
    "job_tag_1": 1,
    "job_tag_2": 4,
}
q.default_g_jk_kwargs["rng_state"] = q.RngState("rejk")
q.default_g_jk_kwargs["all_jk_idx"] = None
q.default_g_jk_kwargs["get_all_jk_idx"] = get_all_jk_idx
q.default_g_jk_kwargs["all_jk_idx_set"] = set()

rs = q.RngState("seed1")
job_tag = "job_tag_1"
traj_list = get_traj_list(job_tag)

data_arr = rs.g_rand_arr((len(traj_list), 5,))  # can be list or np.array
jk_arr_1 = q.g_mk_jk(data_arr, [(job_tag, traj) for traj in traj_list])
avg, err = q.g_jk_avg_err(jk_arr_1)

for i in range(len(avg)):
    q.json_results_append(f"avg[{i}]", avg[i])
    q.json_results_append(f"err[{i}]", err[i])

rs = q.RngState("seed2")
job_tag = "job_tag_2"
traj_list = get_traj_list(job_tag)

data_arr = rs.g_rand_arr((len(traj_list), 5,))  # can be list or np.array
jk_arr_2 = q.g_mk_jk(data_arr, [(job_tag, traj) for traj in traj_list])
avg, err = q.g_jk_avg_err(jk_arr_2)

for i in range(len(avg)):
    q.json_results_append(f"avg[{i}]", avg[i])
    q.json_results_append(f"err[{i}]", err[i])

jk_arr = jk_arr_1 + jk_arr_2
avg, err = q.g_jk_avg_err(jk_arr)

for i in range(len(avg)):
    q.json_results_append(f"avg[{i}]", avg[i])
    q.json_results_append(f"err[{i}]", err[i])

jk_val_arr = q.g_mk_jk_val("val-tag", 1.0, 0.5)
avg, err = q.g_jk_avg_err(jk_val_arr)

q.json_results_append(f"avg", avg)
q.json_results_append(f"err", err)

jk_diff_arr = jk_arr - jk_val_arr[:, None]
avg, err = q.g_jk_avg_err(jk_diff_arr)

for i in range(len(avg)):
    q.json_results_append(f"avg[{i}]", avg[i])
    q.json_results_append(f"err[{i}]", err[i])

q.check_log_json(__file__, check_eps=1e-10)
q.end_with_mpi()
q.displayln_info(f"CHECK: finished successfully.")

Example for a variant of the conventional Super-Jackknife method: examples-py/jackknife-super-hash.py

#!/usr/bin/env python3

import qlat as q
import numpy as np

q.begin_with_mpi()

q.default_g_jk_kwargs["jk_type"] = "super"
q.default_g_jk_kwargs["eps"] = 1
q.default_g_jk_kwargs["is_hash_jk_idx"] = True
q.default_g_jk_kwargs["jk_idx_hash_size"] = 1024
q.default_g_jk_kwargs["block_size"] = 1
q.default_g_jk_kwargs["block_size_dict"] = {
    "job_tag_1": 1,
    "job_tag_2": 4,
}
q.default_g_jk_kwargs["rng_state"] = q.RngState("rejk")
q.default_g_jk_kwargs["all_jk_idx"] = None
q.default_g_jk_kwargs["get_all_jk_idx"] = None
q.default_g_jk_kwargs["all_jk_idx_set"] = set()

rs = q.RngState("seed1")
job_tag = "job_tag_1"
traj_list = list(range(20))

data_arr = rs.g_rand_arr((len(traj_list), 5,))  # can be list or np.array
jk_arr_1 = q.g_mk_jk(data_arr, [(job_tag, traj) for traj in traj_list])
avg, err = q.g_jk_avg_err(jk_arr_1)

for i in range(len(avg)):
    q.json_results_append(f"avg[{i}]", avg[i])
    q.json_results_append(f"err[{i}]", err[i])

rs = q.RngState("seed2")
job_tag = "job_tag_2"
traj_list = list(range(30))

data_arr = rs.g_rand_arr((len(traj_list), 5,))  # can be list or np.array
jk_arr_2 = q.g_mk_jk(data_arr, [(job_tag, traj) for traj in traj_list])
avg, err = q.g_jk_avg_err(jk_arr_2)

for i in range(len(avg)):
    q.json_results_append(f"avg[{i}]", avg[i])
    q.json_results_append(f"err[{i}]", err[i])

jk_arr = jk_arr_1 + jk_arr_2
avg, err = q.g_jk_avg_err(jk_arr)

for i in range(len(avg)):
    q.json_results_append(f"avg[{i}]", avg[i])
    q.json_results_append(f"err[{i}]", err[i])

jk_val_arr = q.g_mk_jk_val("val-tag", 1.0, 0.5)
avg, err = q.g_jk_avg_err(jk_val_arr)

q.json_results_append(f"avg", avg)
q.json_results_append(f"err", err)

jk_diff_arr = jk_arr - jk_val_arr[:, None]
avg, err = q.g_jk_avg_err(jk_diff_arr)

for i in range(len(avg)):
    q.json_results_append(f"avg[{i}]", avg[i])
    q.json_results_append(f"err[{i}]", err[i])

q.check_log_json(__file__, check_eps=1e-10)
q.end_with_mpi()
q.displayln_info(f"CHECK: finished successfully.")