001/* =========================================================== 002 * JFreeChart : a free chart library for the Java(tm) platform 003 * =========================================================== 004 * 005 * (C) Copyright 2000-2013, by Object Refinery Limited and Contributors. 006 * 007 * Project Info: http://www.jfree.org/jfreechart/index.html 008 * 009 * This library is free software; you can redistribute it and/or modify it 010 * under the terms of the GNU Lesser General Public License as published by 011 * the Free Software Foundation; either version 2.1 of the License, or 012 * (at your option) any later version. 013 * 014 * This library is distributed in the hope that it will be useful, but 015 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 016 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 017 * License for more details. 018 * 019 * You should have received a copy of the GNU Lesser General Public 020 * License along with this library; if not, write to the Free Software 021 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, 022 * USA. 023 * 024 * [Oracle and Java are registered trademarks of Oracle and/or its affiliates. 025 * Other names may be trademarks of their respective owners.] 026 * 027 * --------------- 028 * Statistics.java 029 * --------------- 030 * (C) Copyright 2000-2013, by Matthew Wright and Contributors. 031 * 032 * Original Author: Matthew Wright; 033 * Contributor(s): David Gilbert (for Object Refinery Limited); 034 * 035 * Changes (from 08-Nov-2001) 036 * -------------------------- 037 * 08-Nov-2001 : Added standard header and tidied Javadoc comments (DG); 038 * Moved from JFreeChart to package com.jrefinery.data.* in 039 * JCommon class library (DG); 040 * 24-Jun-2002 : Removed unnecessary local variable (DG); 041 * 07-Oct-2002 : Fixed errors reported by Checkstyle (DG); 042 * 26-May-2004 : Moved calculateMean() method from BoxAndWhiskerCalculator (DG); 043 * 02-Jun-2004 : Fixed bug in calculateMedian() method (DG); 044 * 11-Jan-2005 : Removed deprecated code in preparation for the 1.0.0 045 * release (DG); 046 * 02-Jul-2013 : Use ParamChecks (DG); 047 * 048 */ 049 050package org.jfree.data.statistics; 051 052import java.util.ArrayList; 053import java.util.Collection; 054import java.util.Collections; 055import java.util.Iterator; 056import java.util.List; 057import org.jfree.chart.util.ParamChecks; 058 059/** 060 * A utility class that provides some common statistical functions. 061 */ 062public abstract class Statistics { 063 064 /** 065 * Returns the mean of an array of numbers. This is equivalent to calling 066 * <code>calculateMean(values, true)</code>. 067 * 068 * @param values the values (<code>null</code> not permitted). 069 * 070 * @return The mean. 071 */ 072 public static double calculateMean(Number[] values) { 073 return calculateMean(values, true); 074 } 075 076 /** 077 * Returns the mean of an array of numbers. 078 * 079 * @param values the values (<code>null</code> not permitted). 080 * @param includeNullAndNaN a flag that controls whether or not 081 * <code>null</code> and <code>Double.NaN</code> values are included 082 * in the calculation (if either is present in the array, the result is 083 * {@link Double#NaN}). 084 * 085 * @return The mean. 086 * 087 * @since 1.0.3 088 */ 089 public static double calculateMean(Number[] values, 090 boolean includeNullAndNaN) { 091 092 ParamChecks.nullNotPermitted(values, "values"); 093 double sum = 0.0; 094 double current; 095 int counter = 0; 096 for (int i = 0; i < values.length; i++) { 097 // treat nulls the same as NaNs 098 if (values[i] != null) { 099 current = values[i].doubleValue(); 100 } 101 else { 102 current = Double.NaN; 103 } 104 // calculate the sum and count 105 if (includeNullAndNaN || !Double.isNaN(current)) { 106 sum = sum + current; 107 counter++; 108 } 109 } 110 double result = (sum / counter); 111 return result; 112 } 113 114 /** 115 * Returns the mean of a collection of <code>Number</code> objects. 116 * 117 * @param values the values (<code>null</code> not permitted). 118 * 119 * @return The mean. 120 */ 121 public static double calculateMean(Collection values) { 122 return calculateMean(values, true); 123 } 124 125 /** 126 * Returns the mean of a collection of <code>Number</code> objects. 127 * 128 * @param values the values (<code>null</code> not permitted). 129 * @param includeNullAndNaN a flag that controls whether or not 130 * <code>null</code> and <code>Double.NaN</code> values are included 131 * in the calculation (if either is present in the array, the result is 132 * {@link Double#NaN}). 133 * 134 * @return The mean. 135 * 136 * @since 1.0.3 137 */ 138 public static double calculateMean(Collection values, 139 boolean includeNullAndNaN) { 140 141 ParamChecks.nullNotPermitted(values, "values"); 142 int count = 0; 143 double total = 0.0; 144 Iterator iterator = values.iterator(); 145 while (iterator.hasNext()) { 146 Object object = iterator.next(); 147 if (object == null) { 148 if (includeNullAndNaN) { 149 return Double.NaN; 150 } 151 } 152 else { 153 if (object instanceof Number) { 154 Number number = (Number) object; 155 double value = number.doubleValue(); 156 if (Double.isNaN(value)) { 157 if (includeNullAndNaN) { 158 return Double.NaN; 159 } 160 } 161 else { 162 total = total + number.doubleValue(); 163 count = count + 1; 164 } 165 } 166 } 167 } 168 return total / count; 169 } 170 171 /** 172 * Calculates the median for a list of values (<code>Number</code> objects). 173 * The list of values will be copied, and the copy sorted, before 174 * calculating the median. To avoid this step (if your list of values 175 * is already sorted), use the {@link #calculateMedian(List, boolean)} 176 * method. 177 * 178 * @param values the values (<code>null</code> permitted). 179 * 180 * @return The median. 181 */ 182 public static double calculateMedian(List values) { 183 return calculateMedian(values, true); 184 } 185 186 /** 187 * Calculates the median for a list of values (<code>Number</code> objects). 188 * If <code>copyAndSort</code> is <code>false</code>, the list is assumed 189 * to be presorted in ascending order by value. 190 * 191 * @param values the values (<code>null</code> permitted). 192 * @param copyAndSort a flag that controls whether the list of values is 193 * copied and sorted. 194 * 195 * @return The median. 196 */ 197 public static double calculateMedian(List values, boolean copyAndSort) { 198 199 double result = Double.NaN; 200 if (values != null) { 201 if (copyAndSort) { 202 int itemCount = values.size(); 203 List copy = new ArrayList(itemCount); 204 for (int i = 0; i < itemCount; i++) { 205 copy.add(i, values.get(i)); 206 } 207 Collections.sort(copy); 208 values = copy; 209 } 210 int count = values.size(); 211 if (count > 0) { 212 if (count % 2 == 1) { 213 if (count > 1) { 214 Number value = (Number) values.get((count - 1) / 2); 215 result = value.doubleValue(); 216 } 217 else { 218 Number value = (Number) values.get(0); 219 result = value.doubleValue(); 220 } 221 } 222 else { 223 Number value1 = (Number) values.get(count / 2 - 1); 224 Number value2 = (Number) values.get(count / 2); 225 result = (value1.doubleValue() + value2.doubleValue()) 226 / 2.0; 227 } 228 } 229 } 230 return result; 231 } 232 233 /** 234 * Calculates the median for a sublist within a list of values 235 * (<code>Number</code> objects). 236 * 237 * @param values the values, in any order (<code>null</code> not 238 * permitted). 239 * @param start the start index. 240 * @param end the end index. 241 * 242 * @return The median. 243 */ 244 public static double calculateMedian(List values, int start, int end) { 245 return calculateMedian(values, start, end, true); 246 } 247 248 /** 249 * Calculates the median for a sublist within a list of values 250 * (<code>Number</code> objects). The entire list will be sorted if the 251 * <code>ascending</code< argument is <code>false</code>. 252 * 253 * @param values the values (<code>null</code> not permitted). 254 * @param start the start index. 255 * @param end the end index. 256 * @param copyAndSort a flag that that controls whether the list of values 257 * is copied and sorted. 258 * 259 * @return The median. 260 */ 261 public static double calculateMedian(List values, int start, int end, 262 boolean copyAndSort) { 263 264 double result = Double.NaN; 265 if (copyAndSort) { 266 List working = new ArrayList(end - start + 1); 267 for (int i = start; i <= end; i++) { 268 working.add(values.get(i)); 269 } 270 Collections.sort(working); 271 result = calculateMedian(working, false); 272 } 273 else { 274 int count = end - start + 1; 275 if (count > 0) { 276 if (count % 2 == 1) { 277 if (count > 1) { 278 Number value 279 = (Number) values.get(start + (count - 1) / 2); 280 result = value.doubleValue(); 281 } 282 else { 283 Number value = (Number) values.get(start); 284 result = value.doubleValue(); 285 } 286 } 287 else { 288 Number value1 = (Number) values.get(start + count / 2 - 1); 289 Number value2 = (Number) values.get(start + count / 2); 290 result 291 = (value1.doubleValue() + value2.doubleValue()) / 2.0; 292 } 293 } 294 } 295 return result; 296 297 } 298 299 /** 300 * Returns the standard deviation of a set of numbers. 301 * 302 * @param data the data (<code>null</code> or zero length array not 303 * permitted). 304 * 305 * @return The standard deviation of a set of numbers. 306 */ 307 public static double getStdDev(Number[] data) { 308 ParamChecks.nullNotPermitted(data, "data"); 309 if (data.length == 0) { 310 throw new IllegalArgumentException("Zero length 'data' array."); 311 } 312 double avg = calculateMean(data); 313 double sum = 0.0; 314 315 for (int counter = 0; counter < data.length; counter++) { 316 double diff = data[counter].doubleValue() - avg; 317 sum = sum + diff * diff; 318 } 319 return Math.sqrt(sum / (data.length - 1)); 320 } 321 322 /** 323 * Fits a straight line to a set of (x, y) data, returning the slope and 324 * intercept. 325 * 326 * @param xData the x-data (<code>null</code> not permitted). 327 * @param yData the y-data (<code>null</code> not permitted). 328 * 329 * @return A double array with the intercept in [0] and the slope in [1]. 330 */ 331 public static double[] getLinearFit(Number[] xData, Number[] yData) { 332 333 ParamChecks.nullNotPermitted(xData, "xData"); 334 ParamChecks.nullNotPermitted(yData, "yData"); 335 if (xData.length != yData.length) { 336 throw new IllegalArgumentException( 337 "Statistics.getLinearFit(): array lengths must be equal."); 338 } 339 340 double[] result = new double[2]; 341 // slope 342 result[1] = getSlope(xData, yData); 343 // intercept 344 result[0] = calculateMean(yData) - result[1] * calculateMean(xData); 345 346 return result; 347 348 } 349 350 /** 351 * Finds the slope of a regression line using least squares. 352 * 353 * @param xData the x-values (<code>null</code> not permitted). 354 * @param yData the y-values (<code>null</code> not permitted). 355 * 356 * @return The slope. 357 */ 358 public static double getSlope(Number[] xData, Number[] yData) { 359 ParamChecks.nullNotPermitted(xData, "xData"); 360 ParamChecks.nullNotPermitted(yData, "yData"); 361 if (xData.length != yData.length) { 362 throw new IllegalArgumentException("Array lengths must be equal."); 363 } 364 365 // ********* stat function for linear slope ******** 366 // y = a + bx 367 // a = ybar - b * xbar 368 // sum(x * y) - (sum (x) * sum(y)) / n 369 // b = ------------------------------------ 370 // sum (x^2) - (sum(x)^2 / n 371 // ************************************************* 372 373 // sum of x, x^2, x * y, y 374 double sx = 0.0, sxx = 0.0, sxy = 0.0, sy = 0.0; 375 int counter; 376 for (counter = 0; counter < xData.length; counter++) { 377 sx = sx + xData[counter].doubleValue(); 378 sxx = sxx + Math.pow(xData[counter].doubleValue(), 2); 379 sxy = sxy + yData[counter].doubleValue() 380 * xData[counter].doubleValue(); 381 sy = sy + yData[counter].doubleValue(); 382 } 383 return (sxy - (sx * sy) / counter) / (sxx - (sx * sx) / counter); 384 385 } 386 387 /** 388 * Calculates the correlation between two datasets. Both arrays should 389 * contain the same number of items. Null values are treated as zero. 390 * <P> 391 * Information about the correlation calculation was obtained from: 392 * 393 * http://trochim.human.cornell.edu/kb/statcorr.htm 394 * 395 * @param data1 the first dataset. 396 * @param data2 the second dataset. 397 * 398 * @return The correlation. 399 */ 400 public static double getCorrelation(Number[] data1, Number[] data2) { 401 ParamChecks.nullNotPermitted(data1, "data1"); 402 ParamChecks.nullNotPermitted(data2, "data2"); 403 if (data1.length != data2.length) { 404 throw new IllegalArgumentException( 405 "'data1' and 'data2' arrays must have same length." 406 ); 407 } 408 int n = data1.length; 409 double sumX = 0.0; 410 double sumY = 0.0; 411 double sumX2 = 0.0; 412 double sumY2 = 0.0; 413 double sumXY = 0.0; 414 for (int i = 0; i < n; i++) { 415 double x = 0.0; 416 if (data1[i] != null) { 417 x = data1[i].doubleValue(); 418 } 419 double y = 0.0; 420 if (data2[i] != null) { 421 y = data2[i].doubleValue(); 422 } 423 sumX = sumX + x; 424 sumY = sumY + y; 425 sumXY = sumXY + (x * y); 426 sumX2 = sumX2 + (x * x); 427 sumY2 = sumY2 + (y * y); 428 } 429 return (n * sumXY - sumX * sumY) / Math.pow((n * sumX2 - sumX * sumX) 430 * (n * sumY2 - sumY * sumY), 0.5); 431 } 432 433 /** 434 * Returns a data set for a moving average on the data set passed in. 435 * 436 * @param xData an array of the x data. 437 * @param yData an array of the y data. 438 * @param period the number of data points to average 439 * 440 * @return A double[][] the length of the data set in the first dimension, 441 * with two doubles for x and y in the second dimension 442 */ 443 public static double[][] getMovingAverage(Number[] xData, Number[] yData, 444 int period) { 445 446 // check arguments... 447 if (xData.length != yData.length) { 448 throw new IllegalArgumentException("Array lengths must be equal."); 449 } 450 451 if (period > xData.length) { 452 throw new IllegalArgumentException( 453 "Period can't be longer than dataset."); 454 } 455 456 double[][] result = new double[xData.length - period][2]; 457 for (int i = 0; i < result.length; i++) { 458 result[i][0] = xData[i + period].doubleValue(); 459 // holds the moving average sum 460 double sum = 0.0; 461 for (int j = 0; j < period; j++) { 462 sum += yData[i + j].doubleValue(); 463 } 464 sum = sum / period; 465 result[i][1] = sum; 466 } 467 return result; 468 469 } 470 471}