import numpy as np import torch from scipy import signal from statsmodels.tsa.stattools import acf def lempel_ziv_complexity(binary_sequence: np.ndarray) -> int: """Computes the Lempel-Ziv complexity of a binary sequence.""" sub_strings = set() n = len(binary_sequence) i = 0 count = 0 while i < n: sub_str = "" for j in range(i, n): sub_str += str(binary_sequence[j]) if sub_str not in sub_strings: sub_strings.add(sub_str) count += 1 i = j + 1 break else: i += 1 return count def is_low_quality( series: torch.Tensor, autocorr_threshold: float = 0.2, snr_threshold: float = 0.5, complexity_threshold: float = 0.4, ) -> bool: """ Returns True if the series appears non-forecastable (noise-like): - weak autocorrelation - low SNR proxy - high normalized Lempel-Ziv complexity """ x = series.squeeze().detach().cpu().numpy() if x.size < 20: return True if np.var(x) < 1e-10: return True x_detrended = signal.detrend(x) try: max_lags = min(len(x_detrended) // 4, 40) if max_lags < 1: autocorr_strength = 0.0 else: acf_vals = acf(x_detrended, nlags=max_lags, fft=True)[1:] autocorr_strength = float(np.max(np.abs(acf_vals))) except Exception: autocorr_strength = 0.0 win_size = max(3, min(len(x) // 10, 15)) signal_est = np.convolve(x, np.ones(win_size) / win_size, mode="valid") noise_est = x[win_size - 1 :] - signal_est var_signal = float(np.var(signal_est)) var_noise = float(np.var(noise_est)) snr_proxy = var_signal / var_noise if var_noise > 1e-8 else 1.0 median_val = float(np.median(x_detrended)) binary_seq = (x_detrended > median_val).astype(np.uint8) complexity_score = lempel_ziv_complexity(binary_seq) normalized_complexity = complexity_score / max(1, len(binary_seq)) is_random_like = (snr_proxy < snr_threshold) and (normalized_complexity > complexity_threshold) is_uncorrelated = autocorr_strength < autocorr_threshold return bool(is_uncorrelated and is_random_like)