Java滤波去除异常峰值方法(二)

发布于:2025-08-05 ⋅ 阅读:(14) ⋅ 点赞:(0)

1. 基于滑动窗口的中值绝对偏差(MAD)方法

public static double[] removeContinuousOutliersMAD(double[] data, int windowSize, double threshold) {
    double[] filtered = Arrays.copyOf(data, data.length);
    
    for (int i = 0; i < data.length; i++) {
        // 计算窗口内的中位数
        List<Double> window = new ArrayList<>();
        for (int j = Math.max(0, i - windowSize/2); j <= Math.min(data.length - 1, i + windowSize/2); j++) {
            window.add(data[j]);
        }
        double median = getMedian(window);
        
        // 计算MAD (Median Absolute Deviation)
        List<Double> deviations = new ArrayList<>();
        for (Double value : window) {
            deviations.add(Math.abs(value - median));
        }
        double mad = getMedian(deviations);
        
        // 替换异常值
        if (mad != 0 && Math.abs(data[i] - median) > threshold * mad) {
            filtered[i] = median; // 或用邻域值替换
        }
    }
    
    return filtered;
}

private static double getMedian(List<Double> list) {
    Collections.sort(list);
    return list.get(list.size() / 2);
}

2、基于连续异常值计数的剔除方法

public static double[] removeContinuousOutliers(double[] data, double threshold, int maxConsecutive) {
    double[] filtered = Arrays.copyOf(data, data.length);
    double mean = calculateMean(data);
    double stdDev = calculateStdDev(data, mean);
    
    int consecutiveCount = 0;
    
    for (int i = 0; i < data.length; i++) {
        if (Math.abs(data[i] - mean) > threshold * stdDev) {
            consecutiveCount++;
            if (consecutiveCount > maxConsecutive) {
                // 使用前后非异常值的平均值替换
                double replacement = findReplacementValue(data, i);
                filtered[i] = replacement;
            }
        } else {
            consecutiveCount = 0;
        }
    }
    
    return filtered;
}

private static double findReplacementValue(double[] data, int index) {
    // 向前找第一个非异常值
    double prev = 0;
    for (int i = index - 1; i >= 0; i--) {
        if (Math.abs(data[i] - calculateMean(data)) <= calculateStdDev(data, calculateMean(data))) {
            prev = data[i];
            break;
        }
    }
    
    // 向后找第一个非异常值
    double next = 0;
    for (int i = index + 1; i < data.length; i++) {
        if (Math.abs(data[i] - calculateMean(data)) <= calculateStdDev(data, calculateMean(data))) {
            next = data[i];
            break;
        }
    }
    
    return (prev + next) / 2.0;
}

3. 使用指数加权移动平均(EWMA)检测连续异常

public static double[] detectContinuousAnomaliesEWMA(double[] data, double lambda, double threshold) {
    double[] filtered = Arrays.copyOf(data, data.length);
    double ewma = data[0];
    int anomalyStreak = 0;
    
    for (int i = 1; i < data.length; i++) {
        ewma = lambda * data[i] + (1 - lambda) * ewma;
        double residual = Math.abs(data[i] - ewma);
        
        if (residual > threshold) {
            anomalyStreak++;
            if (anomalyStreak >= 3) { // 连续3个点异常
                // 使用EWMA值替换
                filtered[i] = ewma;
            }
        } else {
            anomalyStreak = 0;
        }
    }
    
    return filtered;
}

4. 基于变化率的连续异常检测

public static double[] removeContinuousSpikes(double[] data, double rateThreshold) {
    double[] filtered = Arrays.copyOf(data, data.length);
    double[] rates = new double[data.length - 1];
    
    // 计算变化率
    for (int i = 0; i < rates.length; i++) {
        rates[i] = Math.abs(data[i+1] - data[i]);
    }
    
    // 计算变化率的统计量
    double rateMean = calculateMean(rates);
    double rateStd = calculateStdDev(rates, rateMean);
    
    // 检测连续异常变化
    int spikeLength = 0;
    for (int i = 1; i < data.length - 1; i++) {
        double prevRate = Math.abs(data[i] - data[i-1]);
        double nextRate = Math.abs(data[i+1] - data[i]);
        
        if ((prevRate > rateMean + rateThreshold * rateStd) && 
            (nextRate > rateMean + rateThreshold * rateStd)) {
            spikeLength++;
            if (spikeLength >= 2) { // 连续两个点变化率过大
                // 使用前后点的平均值替换
                filtered[i] = (data[i-1] + data[i+1]) / 2.0;
            }
        } else {
            spikeLength = 0;
        }
    }
    
    return filtered;
}

辅助方法

private static double calculateMean(double[] data) {
    double sum = 0;
    for (double d : data) sum += d;
    return sum / data.length;
}

private static double calculateStdDev(double[] data, double mean) {
    double variance = 0;
    for (double d : data) variance += Math.pow(d - mean, 2);
    return Math.sqrt(variance / data.length);
}

测试:

public static void main(String[] args) {
    double[] data = {10, 10.1, 10.2, 50, 55, 52, 10.3, 10.2, 10.1, 60, 65, 10};
    
    // 方法1: 基于MAD
    double[] result1 = removeContinuousOutliersMAD(data, 5, 3.0);
    
    // 方法2: 基于连续计数
    double[] result2 = removeContinuousOutliers(data, 2.5, 2);
    
    // 方法3: EWMA方法
    double[] result3 = detectContinuousAnomaliesEWMA(data, 0.2, 3.0);
    
    System.out.println("原始数据: " + Arrays.toString(data));
    System.out.println("MAD方法: " + Arrays.toString(result1));
    System.out.println("连续计数方法: " + Arrays.toString(result2));
    System.out.println("EWMA方法: " + Arrays.toString(result3));
}

方法选择建议

  1. MAD方法:对非正态分布数据更鲁棒,适合数据分布未知的情况

  2. 连续计数方法:适合已知异常值最大连续长度的情况

  3. EWMA方法:适合时间序列数据,对缓慢变化的异常更敏感

  4. 变化率方法:适合检测数据中突然的连续跳跃

对于特别长的连续异常,可能需要结合领域知识或更复杂的算法,如基于机器学习的方法。


网站公告

今日签到

点亮在社区的每一天
去签到