@article {113, title = {Detection of aberrant item score patterns in computerized adaptive testing: An empirical example using the CUSUM}, journal = {Personality and Individual Differences}, volume = {48}, number = {8}, year = {2010}, pages = {921-925}, abstract = {The scalability of individual trait scores on a computerized adaptive test (CAT) was assessed through investigating the consistency of individual item score patterns. A sample of N = 428 persons completed a personality CAT as part of a career development procedure. To detect inconsistent item score patterns, we used a cumulative sum (CUSUM) procedure. Combined information from the CUSUM, other personality measures, and interviews showed that similar estimated trait values may have a different interpretation.Implications for computer-based assessment are discussed.}, keywords = {CAT, computerized adaptive testing, CUSUM approach, person Fit}, isbn = {01918869}, author = {Egberink, I. J. L. and Meijer, R. R. and Veldkamp, B. P. and Schakel, L. and Smid, N. G.} } @article {278, title = {Using patterns of summed scores in paper-and-pencil tests and computer-adaptive tests to detect misfitting item score patterns}, journal = {Journal of Educational Measurement}, volume = {41}, number = {2}, year = {2004}, pages = {119-136}, abstract = {Two new methods have been proposed to determine unexpected sum scores on subtests (testlets) both for paper-and-pencil tests and computer adaptive tests. A method based on a conservative bound using the hypergeometric distribution, denoted ρ, was compared with a method where the probability for each score combination was calculated using a highest density region (HDR). Furthermore, these methods were compared with the standardized log-likelihood statistic with and without a correction for the estimated latent trait value (denoted as l-super(*)-sub(z) and l-sub(z), respectively). Data were simulated on the basis of the one-parameter logistic model, and both parametric and nonparametric logistic regression was used to obtain estimates of the latent trait. Results showed that it is important to take the trait level into account when comparing subtest scores. In a nonparametric item response theory (IRT) context, on adapted version of the HDR method was a powerful alterative to ρ. In a parametric IRT context, results showed that l-super(*)-sub(z) had the highest power when the data were simulated conditionally on the estimated latent trait level. (PsycINFO Database Record (c) 2005 APA ) (journal abstract)}, keywords = {Computer Assisted Testing, Item Response Theory, person Fit, Test Scores}, author = {Meijer, R. R.} } @article {407, title = {Using response times to detect aberrant responses in computerized adaptive testing}, journal = {Psychometrika}, volume = {68}, number = {2}, year = {2003}, pages = {251-265}, abstract = {A lognormal model for response times is used to check response times for aberrances in examinee behavior on computerized adaptive tests. Both classical procedures and Bayesian posterior predictive checks are presented. For a fixed examinee, responses and response times are independent; checks based on response times offer thus information independent of the results of checks on response patterns. Empirical examples of the use of classical and Bayesian checks for detecting two different types of aberrances in response times are presented. The detection rates for the Bayesian checks outperformed those for the classical checks, but at the cost of higher false-alarm rates. A guideline for the choice between the two types of checks is offered.}, keywords = {Adaptive Testing, Behavior, Computer Assisted Testing, computerized adaptive testing, Models, person Fit, Prediction, Reaction Time}, author = {van der Linden, W. J. and van Krimpen-Stoop, E. M. L. A.} } @article {277, title = {Outlier detection in high-stakes certification testing}, journal = {Journal of Educational Measurement}, volume = {39}, number = {3}, year = {2002}, pages = {219-233}, abstract = {Discusses recent developments of person-fit analysis in computerized adaptive testing (CAT). Methods from statistical process control are presented that have been proposed to classify an item score pattern as fitting or misfitting the underlying item response theory model in CAT Most person-fit research in CAT is restricted to simulated data. In this study, empirical data from a certification test were used. Alternatives are discussed to generate norms so that bounds can be determined to classify an item score pattern as fitting or misfitting. Using bounds determined from a sample of a high-stakes certification test, the empirical analysis showed that different types of misfit can be distinguished Further applications using statistical process control methods to detect misfitting item score patterns are discussed. (PsycINFO Database Record (c) 2005 APA )}, keywords = {Adaptive Testing, computerized adaptive testing, Educational Measurement, Goodness of Fit, Item Analysis (Statistical), Item Response Theory, person Fit, Statistical Estimation, Statistical Power, Test Scores}, author = {Meijer, R. R.} } @inbook {410, title = {Detecting person misfit in adaptive testing using statistical process control techniques}, booktitle = {Computer adaptive testing: Theory and practice}, year = {2000}, pages = {201-219}, publisher = {Kluwer Academic.}, organization = {Kluwer Academic.}, address = {Dordrecht, The Netherlands}, keywords = {person Fit}, author = {van Krimpen-Stoop, E. M. L. A. and Meijer, R. R.} }