rdmtools/trr379_rdmtools/fix_dwi_sdc.py

"""Fix up incorrect naming of DWI files

The initial heuristic incorrectly placed series named
"dwi_acq-(...)b0ref" as _sbref in the DWI folder instead of _epi in
the func folder. These series are intended to be used for field map
correction of the actual dwi images because they have opposite phase
encoding durations.

This is a Python reimplementation of the correction proposed by
richard0nkrumah.

Here, we expect to run within a datalad run call, so OS operations
(via Path) are used to rename or remove files (subsequent datalad save
should set the record straight). We use heudiconv's function for
saving pretty JSON to minimize changes in the files we touch.

"""

import argparse
import json
from pathlib import Path
import re


def fixup_session(ss_dir: Path) -> None:

    subject, session = ss_labels(ss_dir)

    repls = {}
    for acq in ("b1200", "mshell"):

        for p in ss_dir.glob(f"dwi/sub-{subject}_ses-{session}_acq-{acq}_dwi.*"):
            # dwi files with no dir-<label> in name (should have been dir-PA)
            new_stem = p.stem.replace("_dwi", "_dir-PA_dwi")
            new_p = p.rename(p.with_stem(new_stem))
            if p.suffix == ".gz":
                # .nii.gz also goes to scans file
                repls[p.relative_to(ss_dir)] = new_p.relative_to(ss_dir)

        for p in ss_dir.glob(f"dwi/sub-{subject}_ses-{session}_acq-{acq}_sbref.*"):
            # sbref (should have been in fmap/ with dir-AP and _epi suffix)
            if p.suffix in (".bval", ".bvec"):
                p.unlink()
            elif p.suffix == ".gz":
                new_p = p.rename(ss_dir / "fmap" / p.name.replace("_sbref", "_dir-AP_epi"))
                repls[p.relative_to(ss_dir)] = new_p.relative_to(ss_dir)
            elif p.suffix == ".json":
                new_p = p.rename(ss_dir / "fmap" / p.name.replace("_sbref", "_dir-AP_epi"))
                ensure_intended_for(
                    new_p,
                    [
                        f"ses-{session}/dwi/sub-{subject}_ses-{session}_acq-{acq}_dir-PA_dwi.nii.gz"
                    ],
                )

    if len(repls) > 0:
        edit_scans_file(ss_dir / f"sub-{subject}_ses-{session}_scans.tsv", repls)


def ss_labels(ss_dir: Path) -> tuple[str, str]:
    subject_label = ss_dir.parent.name.split("-")[-1]
    session_label = ss_dir.name.split("-")[-1]
    return subject_label, session_label


def edit_scans_file(scans_tsv: Path, repls: dict[Path, Path]) -> None:
    try:
        txt = scans_tsv.read_text()
    except FileNotFoundError:
        print(f"{scans_tsv} file is missing, did you get it?")
        return

    for old, new in repls.items():
        txt = txt.replace(str(old), str(new))

    try:
        scans_tsv.write_text(txt)
    except PermissionError:
        print(f"{scans_tsv} permission error, did you unlock it?")


def ensure_intended_for(sidecar: Path, targets: list[str]) -> None:
    """Ensure the given value of IntendedFor

    Will not write to the file if the given value is already there.

    """
    try:
        with sidecar.open() as jp:
            j = json.load(jp)
    except FileNotFoundError:
        print(f"{sidecar.name} file is missing, did you get it?")
        return

    if j.get("IntendedFor") == targets:
        print(f"〰 {sidecar.name}: nothing to do")
    else:
        print(f"🔨 {sidecar.name}: updating IntendedFor (preset)")
        j.update({"IntendedFor": targets})
        try:
            save_pretty_json(sidecar, j)
        except PermissionError:
            print(f"{sidecar.name} permission error, did you unlock it?")


def save_pretty_json(filename: Path, data: dict) -> None:
    j = json_dumps_pretty(data)
    with filename.open("w") as fp:
        fp.write(j)


def json_dumps_pretty(j: dict, indent: int = 2, sort_keys: bool = True) -> str:
    """Given a json structure, pretty print it by colliding numeric arrays
    into a line.

    If resultant structure differs from original -- throws exception.

    Copied & minimally changed from heudiconv 1.4.0 (utils.py).
    https://github.com/nipy/heudiconv/

    Copyright HeuDiConv developers; licensed under the Apache License,
    Version 2.0. http://www.apache.org/licenses/LICENSE-2.0

    """
    js = json.dumps(j, indent=indent, sort_keys=sort_keys)
    # trim away \n and spaces between entries of numbers
    js_ = re.sub(
        '[\n ]+("?[-+.0-9e]+"?,?) *\n(?= *"?[-+.0-9e]+"?)',
        r" \1",
        js,
        flags=re.MULTILINE,
    )
    # uniform no spaces before ]
    js_ = re.sub(r" *\]", "]", js_)
    # uniform spacing before numbers
    # But that thing could screw up dates within strings which would have 2 spaces
    # in a date like Mar  3 2017, so we do negative lookahead to avoid changing
    # in those cases
    # import pdb; pdb.set_trace()
    js_ = re.sub(
        r"(?<!\w{3})"  # negative lookbehind for the month
        r'  *("?[-+.0-9e]+"?)'
        r"(?! [123]\d{3})"  # negative lookahead for a year
        r"(?P<space> ?)[ \n]*",
        r" \1\g<space>",
        js_,
    )
    # no spaces after [
    js_ = re.sub(r"\[ ", "[", js_)
    # the load from the original dump and reload from tuned up
    # version should result in identical values since no value
    # must be changed, just formatting.
    j_just_reloaded = json.loads(js)
    j_tuned = json.loads(js_)

    assert j_just_reloaded == j_tuned, (
        "Values differed when they should have not. "
        "Report to the heudiconv developers"
    )

    return js_


parser = argparse.ArgumentParser()
parser.add_argument("bids_root", type=Path)
parser.add_argument("--subject", nargs="*", help="Subject label(s)")
parser.add_argument("--session", nargs="*", help="Session label(s)")
args = parser.parse_args()

subjects = set(args.subject) if args.subject is not None else None
sessions = set(args.session) if args.session is not None else None

# glob the BIDS root and restrict to subjects / sessions from CLI
dirs: list[Path] = []
for p in sorted(args.bids_root.glob("sub-*/ses-*")):
    subject, session = ss_labels(p)
    if (subjects is None or subject in subjects) and (sessions is None or session in sessions):
        dirs.append(p)

# fix up the sidecars for each subject-session
for session_dir in dirs:
    fixup_session(session_dir)