pesky-fieldmaps/peg_fieldmaps.py

# /// script
# requires-python = ">=3.12"
# dependencies = [
#     "numpy>=2.4.4",
# ]
# ///
"""Fix-up IntendedFor to match original intentions

The original intention was for each field map to be used with one func
acquisition, but it is difficult to specify such pre-set mapping in
heudiconv. The original conversion was done with heudiconv's Shims &
ImagingVolume strategies. This setting guarantees that the images
which get paired are compatible, but it usually does not use all field
map runs, leading to a perceived impression of randomness. Sometimes,
the time between acquisitions caused one field map to be used for two
func acquisitions (with criterion set to Closest, heudiconv compares
start time and prefers closest), sometimes (rarely) the shims really
did not match.

This script modifies the sidecar JSONs and assigns the field maps on a
one-to-one basis, as long as the shims match (we forgo checking NIfTI
headers for dimensions) and the entire session has the expected set of
acquisitions.

We report the changed (or untouched) files with emojis to provide a
quick overview. For the core logic, see fixup_session() and read from
there.

"""

import argparse
import json
from pathlib import Path
import re

import numpy as np
import numpy.typing as npt


def json_dumps_pretty(j: dict, indent: int = 2, sort_keys: bool = True) -> str:
    """Given a json structure, pretty print it by colliding numeric arrays
    into a line.

    If resultant structure differs from original -- throws exception.

    Copied & minimally changed from heudiconv 1.4.0 (utils.py).
    https://github.com/nipy/heudiconv/

    Copyright HeuDiConv developers; licensed under the Apache License,
    Version 2.0. http://www.apache.org/licenses/LICENSE-2.0

    """
    js = json.dumps(j, indent=indent, sort_keys=sort_keys)
    # trim away \n and spaces between entries of numbers
    js_ = re.sub(
        '[\n ]+("?[-+.0-9e]+"?,?) *\n(?= *"?[-+.0-9e]+"?)',
        r" \1",
        js,
        flags=re.MULTILINE,
    )
    # uniform no spaces before ]
    js_ = re.sub(r" *\]", "]", js_)
    # uniform spacing before numbers
    # But that thing could screw up dates within strings which would have 2 spaces
    # in a date like Mar  3 2017, so we do negative lookahead to avoid changing
    # in those cases
    # import pdb; pdb.set_trace()
    js_ = re.sub(
        r"(?<!\w{3})"  # negative lookbehind for the month
        r'  *("?[-+.0-9e]+"?)'
        r"(?! [123]\d{3})"  # negative lookahead for a year
        r"(?P<space> ?)[ \n]*",
        r" \1\g<space>",
        js_,
    )
    # no spaces after [
    js_ = re.sub(r"\[ ", "[", js_)
    # the load from the original dump and reload from tuned up
    # version should result in identical values since no value
    # must be changed, just formatting.
    j_just_reloaded = json.loads(js)
    j_tuned = json.loads(js_)

    assert j_just_reloaded == j_tuned, (
        "Values differed when they should have not. "
        "Report to the heudiconv developers"
    )

    return js_


def save_pretty_json(filename: Path, data: dict) -> None:
    j = json_dumps_pretty(data)
    with filename.open("w") as fp:
        fp.write(j)


def ss_labels(ss_dir: Path) -> tuple[str, str]:
    subject_label = ss_dir.parent.name.split("-")[-1]
    session_label = ss_dir.name.split("-")[-1]
    return subject_label, session_label


def is_model_dir(ss_dir: Path) -> bool:
    """Check if the layout matches expectations and files are available

    Tests sidecars. Checks whether sidecars for all three func runs
    and their respective field maps are present and available. Returns
    False and prints a message if there are more or less acquistitions
    in func or fmap, or some of the expected sidecars are not
    available. Is oblivious to the existence of DWI-intended field
    maps (those with acq in name).

    This is done with the conviction that it only makes sense to try
    and enforce the preset mapping for a "standard" scanning session,
    and any outliers are better inspected manually. Checking file
    presence at this stage relieves us from having to handle read
    errors on further processing.

    """
    subject, session = ss_labels(ss_dir)
    expected_sidecars = set()
    for run, task in ((1, "rest"), (2, "restvidn"), (3, "restvida")):
        expected_sidecars |= {
            ss_dir / "fmap" / f"sub-{subject}_ses-{session}_dir-ap_run-{run}_epi.json",
            ss_dir / "fmap" / f"sub-{subject}_ses-{session}_dir-pa_run-{run}_epi.json",
            ss_dir / "func" / f"sub-{subject}_ses-{session}_task-{task}_bold.json",
        }

    found_sidecars = set(
        ss_dir.joinpath("fmap").glob(f"*ses-{session}_dir*json")  # ignores _acq- (fmap)
    ) | set(ss_dir.joinpath("func").glob("*json"))

    if not expected_sidecars == found_sidecars:
        print(f"⛔ sub-{subject}: layout does not match expectation, investigate manually")
        return False

    if not all(p.is_file() for p in expected_sidecars):
        print(f"⛔ sub-{subject}: (some) sidecars are not present, did you run datalad get?")
        return False

    print(f"🟢 sub-{subject}: layout matches expectation")
    return True


def load_shim(p: Path) -> npt.NDArray:
    with p.open() as fp:
        d = json.load(fp)
    return np.array(d["ShimSetting"])


def shims_match(paths: list[Path]) -> np.bool:
    shims = np.array([load_shim(p) for p in paths])
    return np.all(shims == shims[0])


def all_shims_match(ss_dir: Path) -> bool:
    """Check if shims match between all intended sets of images

    Each functional acquisition should have a matching pair in fmap,
    and shims for the three should match (we don't need shims to stay
    constant for the session, as long as each fmap pair matches the
    corresponding func acquisition).

    This is done with the conviction that if there is a mismatch for
    one of the runs, we better leave things as heudiconv left them and
    inspect manually.

    """
    subject, session = ss_labels(ss_dir)
    for run, task in ((1, "rest"), (2, "restvidn"), (3, "restvida")):
        paths = [
            ss_dir / "fmap" / f"sub-{subject}_ses-{session}_dir-ap_run-{run}_epi.json",
            ss_dir / "fmap" / f"sub-{subject}_ses-{session}_dir-pa_run-{run}_epi.json",
            ss_dir / "func" / f"sub-{subject}_ses-{session}_task-{task}_bold.json",
        ]
        if not shims_match(paths):
            print(f"🟡 not all shims match, won't force assignment")
            return False
    return True


def ensure_intended_for(sidecar: Path, targets: list[str]) -> None:
    """Ensure the given value of IntendedFor

    Will not write to the file if the given value is already there.

    """
    with sidecar.open() as jp:
        j = json.load(jp)

    if j.get("IntendedFor") == targets:
        print(f"〰 {sidecar.name}: nothing to do")
    else:
        print(f"🔨 {sidecar.name}: updating IntendedFor (preset)")
        j.update({"IntendedFor": targets})
        save_pretty_json(sidecar, j)


def ensure_not_intended_for_dwi(sidecar: Path) -> None:
    """Remove DWI from IntendedFor

    We know that DWI has its own field map, and the field maps for
    func should not be used (but heudiconv could have assigned
    them). If we overwrite IntendedFor with a preset value we enforce
    that automatically. This function is for the cases when we don't
    do that.

    Will not edit the file if the requirement is already met.

    """
    with sidecar.open() as jp:
        j = json.load(jp)

    filtered = [x for x in j.get("IntendedFor", []) if re.search("dwi/", x) is None]
    if len(filtered) < len(j.get("IntendedFor", [])):
        print(f"🔧 {sidecar.name}: updating IntendedFor (not DWI)")
        j.update({"IntendedFor": filtered})
        save_pretty_json(sidecar, j)


def fixup_session(ss_dir: Path) -> None:
    """Take a session and apply preset IntendedFor, if possible.

    If the layout matches our expectations, we try to write preset
    values into IntendedFor. If we can't do that because shims don't
    match, we only remove DWI from the field maps intended for func.

    """

    # skip if layout is surprising or not all expected JSONs are available
    if not(is_model_dir(ss_dir)):
        return

    # check if shims match for each intended fmap-func pairing
    # if they don't, we better keep the assignment from Heudiconv of fix by hand
    as_intended = all_shims_match(ss_dir)

    subject, session = ss_labels(ss_dir)
    for run, task in ((1, "rest"), (2, "restvidn"), (3, "restvida")):
        target = f"ses-{session}/func/sub-{subject}_ses-{session}_task-{task}_bold.nii.gz"
        for dir in ("ap", "pa"):
            sidecar = ss_dir / "fmap" / f"sub-{subject}_ses-{session}_dir-{dir}_run-{run}_epi.json"
            if as_intended:
                # all runs match, so we can set each exactly as intended
                ensure_intended_for(sidecar, [target])
            else:
                # keep Heudiconv's assignment, only remove dwi
                ensure_not_intended_for_dwi(sidecar)


parser = argparse.ArgumentParser()
parser.add_argument("bids_root", type=Path)
parser.add_argument("--subject", nargs="*", help="Subject label(s)")
parser.add_argument("--session", nargs="*", help="Session label(s)")
args = parser.parse_args()

subjects = set(args.subject) if args.subject is not None else None
sessions = set(args.session) if args.session is not None else None

# glob the BIDS root and restrict to subjects / sessions from CLI
dirs: list[Path] = []
for p in sorted(args.bids_root.glob("sub-*/ses-*")):
    subject, session = ss_labels(p)
    if (subjects is None or subject in subjects) and (sessions is None or session in sessions ):
        dirs.append(p)

# fix up the sidecars for each subject-session
for session_dir in dirs:
    fixup_session(session_dir)