"""Categorize PASS/FAIL lines across the 14 local VASLOG .DAT samples. Goal: understand whether plain-decimal vs E-notation correlates with file (model), date, or random distribution. """ import os, re DAT_DIR = r'D:\claudetools\projects\dataforth-dos\datasheet-pipeline\scmvas-hvas-research\samples\vaslog-dat' RE_PASS_SCI = re.compile(r'"(PASS|FAIL)\s*(-?\d+\.?\d*E[+-]?\d{2})(\d?)"', re.I) RE_PASS_PLAIN = re.compile(r'"(PASS|FAIL)\s*(-?\.?\d+\.?\d*)"', re.I) RE_SNDATE = re.compile(r'^"([^"]+)","(\d{2}-\d{2}-\d{4})"') for fn in sorted(os.listdir(DAT_DIR)): path = os.path.join(DAT_DIR, fn) with open(path, 'r', encoding='latin-1') as f: lines = [l.strip() for l in f if l.strip()] sci = 0 plain = 0 other = 0 dates = [] model = None for line in lines: if line.startswith('"') and not line.startswith('"PASS') and not line.startswith('"FAIL') and ',' not in line and '0' not in line[1:3]: if not model: model = line.replace('"','').strip() m_snd = RE_SNDATE.match(line) if m_snd: dates.append(m_snd.group(2)) continue # Only interested in lines that contain a PASS/FAIL status field (not the SN line) if '"PASS' in line or '"FAIL' in line: m_sci = RE_PASS_SCI.search(line) m_plain = RE_PASS_PLAIN.search(line) if m_sci: sci += 1 elif m_plain: plain += 1 else: other += 1 # Sort dates by year dates_sorted = sorted(dates) date_range = f'{dates_sorted[0]} .. {dates_sorted[-1]}' if dates_sorted else '-' total = sci + plain + other print(f'{fn:20s} model={model!r:18s} total={total:4d} sci={sci:4d} plain={plain:4d} other={other:4d} dates={date_range}')