In the first implementation of join-association we took the original
association instance, removed the original key (by default, "object"),
and inserted the subject of the original association under the "subject"
key.
However, what we produce is essentially an inverted association, which
(even if not used in schema-compliant way) retains all the semantics
of the association class. So the subject of the initial association
(e.g. Project -> associated with -> Person) becomes the object of the
inverted association (Person -> inverse associated with -> project).
Therefore, it seems more logical to keep using the original key
("object" by default).
84 lines
2.5 KiB
Python
84 lines
2.5 KiB
Python
from collections import defaultdict
|
|
from collections.abc import Iterable
|
|
import json
|
|
|
|
import click
|
|
|
|
|
|
def create_map(
|
|
items: Iterable[dict], prop: str, key: str, inline: bool, pop: bool
|
|
) -> dict:
|
|
res = defaultdict(list)
|
|
for item in items:
|
|
associations = item.get(prop, []) if not pop else item.pop(prop, [])
|
|
|
|
for assoc in associations:
|
|
reverse_assoc = {
|
|
k: v for k, v in assoc.items() if k not in {"schema_type", key}
|
|
}
|
|
reverse_assoc[key] = item if inline else item["pid"]
|
|
res[assoc.get(key)].append(reverse_assoc)
|
|
return res
|
|
|
|
|
|
@click.command()
|
|
@click.argument("this", type=click.File("rb"))
|
|
@click.argument("other", type=click.File("rb"))
|
|
@click.argument("property")
|
|
@click.argument("output", type=click.File("wt"))
|
|
@click.option(
|
|
"--inline",
|
|
is_flag=True,
|
|
help="Inline the record while joining; only adds PID otherwise.",
|
|
)
|
|
@click.option(
|
|
"--pop", is_flag=True, help="Remove the association property when inlining."
|
|
)
|
|
@click.option(
|
|
"--field-name",
|
|
help=(
|
|
"Name of the added property, uses 'inverse_' + original property name "
|
|
"if not specified."
|
|
),
|
|
)
|
|
def join(this, other, property, output, inline, pop, field_name):
|
|
"""Perform a join-like operation using association classes
|
|
|
|
Processes records in THIS and adds records from OTHER which are
|
|
associated with records in THIS by the given PROPERTY.
|
|
|
|
THIS, OTHER, and OUTPUT can be JSON lines files or stdin (-). For
|
|
PROPERTY use 'prop[::key]' format (e.g. associated_with::object);
|
|
without the :: part the default key is "object".
|
|
|
|
For example, consider Person and Project records, where
|
|
Project.associated_with.object -> Person. When processing Person
|
|
records, this link can be "inverted" and added to Person records
|
|
by running this filter:
|
|
|
|
join-association.py Persons.jsonl Projects.jsonl associated_with
|
|
|
|
"""
|
|
|
|
# split prop::key (default key is "object")
|
|
prop, _, key = property.partition("::")
|
|
key = "object" if len(key) == 0 else key
|
|
|
|
# name of the added field
|
|
field_name = "inverse_" + prop if field_name is None else field_name
|
|
|
|
assoc_map = create_map(
|
|
items=(json.loads(line) for line in other),
|
|
prop=prop,
|
|
key=key,
|
|
inline=inline,
|
|
pop=pop,
|
|
)
|
|
|
|
for obj in (json.loads(line) for line in this):
|
|
obj[field_name] = assoc_map[obj["pid"]]
|
|
click.echo(json.dumps(obj), output)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
join()
|