Source code for ldc_bpcsad.io.rttm

# Copyright (c) 2023, Trustees of the University of Pennsylvania
# See LICENSE for licensing conditions
"""Functions for reading/writing RTTM files."""
from typing import Iterable, List

from ..segment import Segment

__all__ = ['load_rttm_file', 'write_rttm_file']


[docs]def load_rttm_file(fpath): """Load speech segments from Rich Transcription Time Marked (RTTM) file. **NOTE** that this will load **ALL** segments in the file, regardless of recording, channel, or speaker. Parameters ---------- fpath : pathlib.Path Path to file in RTTM file format. Returns ------- List[Segment] Speech segments. References ---------- .. [1] NIST. (2009). "The 2009 (RT-09) Rich Transcription Meeting Recognition Evaluation Plan." `[link] <https://web.archive.org/web/20100606092041if_/http://www.itl.nist.gov/iad/mig/tests/rt/2009/docs/rt09-meeting-eval-plan-v2.pdf>`_ """ with open(fpath, 'r', encoding='utf-8') as f: segs = [] for line in f: fields = line.strip().split() onset = float(fields[3]) dur = float(fields[4]) segs.append(Segment(onset, onset + dur)) return segs
[docs]def write_rttm_file(rttm_path, segs, file_id, channel=1, is_sorted=False, precision=2): """Write speech segments to Rich Transcription Time Marked (RTTM) file. Parameters ---------- rttm_path : pathlib.Path Path to file in RTTM format. segs : Iterable[Segment] Speech segments. file_id : str File ID to output with segment. Typically, basename of the audio file the segment is on. channel : int, optional Channel segment is on in audio file (1 indexed). (Default: 1) is_sorted : bool, optional If True, treat `segs` as already sorted. Otherwise, sort before writing. (Default: False) precision : int, optional Output will be truncated to `precision` decimal places. (Default: 2) References ---------- .. [1] NIST. (2009). "The 2009 (RT-09) Rich Transcription Meeting Recognition Evaluation Plan." `[link] <https://web.archive.org/web/20100606092041if_/http://www.itl.nist.gov/iad/mig/tests/rt/2009/docs/rt09-meeting-eval-plan-v2.pdf>`_ """ if not (isinstance(channel, int) and 1 <= channel): raise ValueError('Channel must be an integer >= 1.') def _f2s(x, precision): return f'{x:.{precision}f}' if not is_sorted: segs = sorted(segs) with open(rttm_path, 'w', encoding='utf-8') as f: for seg in segs: onset = round(seg.onset, precision) offset = round(seg.offset, precision) dur = offset - onset onset = _f2s(onset, precision) dur = _f2s(dur, precision) f.write(f'SPEAKER {file_id} {channel} {onset} {dur} <NA> <NA> ' f'speaker <NA> <NA>\n')