"""Parser framework with auto-discovery of format-specific parsers."""from__future__importannotationsimportimportlibimportpkgutilimportwarningsfromabcimportABC,abstractmethodfrompathlibimportPathfromecgdatakit.modelsimportECGRecord,_UNIT_ALIASES
[docs]classParser(ABC):"""Base class for all ECG format parsers."""FORMAT_NAME:str=""FORMAT_DESCRIPTION:str=""FILE_EXTENSIONS:list[str]=[]
[docs]@staticmethod@abstractmethoddefcan_parse(file_path:Path,header:bytes)->bool:"""Check if this parser handles the given file. Parameters ---------- file_path : Path Path to the ECG file. header : bytes First 4096 bytes of the file for format sniffing. """...
[docs]@abstractmethoddefparse(self,file_path:Path)->ECGRecord:"""Parse the file and return a structured ECGRecord."""...
[docs]classFileParser:"""Auto-discovers parsers and dispatches files to the right one."""
def_discover_parsers(self)->None:"""Find all Parser subclasses in ecgdatakit.parsers package."""package=importlib.import_module("ecgdatakit.parsing.parsers")for_,name,_inpkgutil.iter_modules(package.__path__):module=importlib.import_module(f"ecgdatakit.parsing.parsers.{name}")forattrinvars(module).values():if(isinstance(attr,type)andissubclass(attr,Parser)andattrisnotParser):self._parsers.append(attr)@propertydefparsers(self)->list[type[Parser]]:"""List of discovered :class:`Parser` subclasses."""returnlist(self._parsers)
[docs]@staticmethoddefsupported_formats()->list[dict[str,str|list[str]]]:"""Return a description of every supported ECG format. Can be called without instantiation:: FileParser.supported_formats() Each entry contains: - ``name`` – short format name (e.g. ``"HL7 aECG"``) - ``description`` – one-line description - ``extensions`` – list of typical file extensions """package=importlib.import_module("ecgdatakit.parsing.parsers")parsers:list[type[Parser]]=[]for_,name,_inpkgutil.iter_modules(package.__path__):module=importlib.import_module(f"ecgdatakit.parsing.parsers.{name}")forattrinvars(module).values():if(isinstance(attr,type)andissubclass(attr,Parser)andattrisnotParser):parsers.append(attr)return[{"name":p.FORMAT_NAMEorp.__name__,"description":p.FORMAT_DESCRIPTIONor(p.__doc__or"").strip(),"extensions":list(p.FILE_EXTENSIONS),}forpinparsers]
[docs]defparse(self,file_path:str|Path,auto_scale:bool=True,units:str="mV",)->ECGRecord:"""Parse an ECG file, auto-detecting the format. Parameters ---------- file_path : str | Path Path to the ECG file. auto_scale : bool When ``True`` (default), leads with scaling metadata are automatically converted to physical units (see *units*). Leads without sufficient metadata are left as raw ADC values and a warning is emitted. Set to ``False`` to always receive raw ADC samples. units : str Target voltage unit when *auto_scale* is ``True``. Accepted values: ``"uV"`` (microvolts), ``"mV"`` (millivolts, default), ``"V"`` (volts). Ignored when *auto_scale* is ``False``. Raises ------ ValueError If no parser can handle the file or *units* is not recognised. """# Validate units earlytarget=_UNIT_ALIASES.get(units)iftargetisNone:raiseValueError(f"Unknown unit {units!r}. ""Accepted values: 'uV', 'mV', 'V'.")path=Path(file_path)ifnotpath.exists():raiseFileNotFoundError(f"File not found: {path}")header=path.read_bytes()[:4096]forparser_clsinself._parsers:ifparser_cls.can_parse(path,header):record=parser_cls().parse(path)ifauto_scale:returnself._auto_scale(record,target)warnings.warn("auto_scale=False: leads contain raw ADC samples. ""Amplitudes are unitless and not in physical units (mV).",stacklevel=2,)returnrecordraiseValueError(f"No parser found for: {path.name}")
@staticmethoddef_auto_scale(record:ECGRecord,target:str="mV")->ECGRecord:"""Convert leads to physical units where scaling metadata is available. Parameters ---------- record : ECGRecord Parsed record with raw or partially-scaled leads. target : str Canonical target unit (``"uV"``, ``"mV"``, or ``"V"``). """importdataclassesnew_leads=[]raw_labels:list[str]=[]forleadinrecord.leads:iflead.resolution==1.0andlead.offset==0.0andnotlead.resolution_unit:raw_labels.append(lead.label)new_leads.append(lead)continuephysical=lead.to_physical()norm=_UNIT_ALIASES.get(physical.units)ifnormandnorm!=target:physical=physical.convert_units(target)new_leads.append(physical)new_beats=[]forbeatinrecord.median_beats:ifbeat.resolution==1.0andbeat.offset==0.0andnotbeat.resolution_unit:new_beats.append(beat)continuephysical=beat.to_physical()norm=_UNIT_ALIASES.get(physical.units)ifnormandnorm!=target:physical=physical.convert_units(target)new_beats.append(physical)ifraw_labels:warnings.warn(f"Leads {raw_labels} contain raw ADC samples — no scaling ""metadata available. Pass auto_scale=False to get raw values.",stacklevel=3,)returndataclasses.replace(record,leads=new_leads,median_beats=new_beats,)