@article {2730, title = {A Blocked-CAT Procedure for CD-CAT}, journal = {Applied Psychological Measurement}, volume = {44}, number = {1}, year = {2020}, pages = {49-64}, abstract = {This article introduces a blocked-design procedure for cognitive diagnosis computerized adaptive testing (CD-CAT), which allows examinees to review items and change their answers during test administration. Four blocking versions of the new procedure were proposed. In addition, the impact of several factors, namely, item quality, generating model, block size, and test length, on the classification rates was investigated. Three popular item selection indices in CD-CAT were used and their efficiency compared using the new procedure. An additional study was carried out to examine the potential benefit of item review. The results showed that the new procedure is promising in that allowing item review resulted only in a small loss in attribute classification accuracy under some conditions. Moreover, using a blocked-design CD-CAT is beneficial to the extent that it alleviates the negative impact of test anxiety on examinees{\textquoteright} true performance.}, doi = {10.1177/0146621619835500}, url = {https://doi.org/10.1177/0146621619835500}, author = {Mehmet Kaplan and Jimmy de la Torre} } @article {2718, title = {Three Measures of Test Adaptation Based on Optimal Test Information}, journal = {Journal of Computerized Adaotive Testing}, volume = {8}, year = {2020}, pages = {1-19}, issn = {2165-6592}, doi = {10.7333/2002-0801001}, url = {http://iacat.org/jcat/index.php/jcat/article/view/80/37}, author = {G. Gage Kingsbury and Steven L. Wise} } @article {2719, title = {Three Measures of Test Adaptation Based on Optimal Test Information}, journal = {Journal of Computerized Adaotive Testing}, volume = {8}, year = {2020}, pages = {1-19}, issn = {2165-6592}, doi = {10.7333/2002-0801001}, url = {http://iacat.org/jcat/index.php/jcat/article/view/80/37}, author = {G. Gage Kingsbury and Steven L. Wise} } @article {2694, title = {Adaptive Testing With a Hierarchical Item Response Theory Model}, journal = {Applied Psychological Measurement}, volume = {43}, number = {1}, year = {2019}, pages = {51-67}, abstract = {The hierarchical item response theory (H-IRT) model is very flexible and allows a general factor and subfactors within an overall structure of two or more levels. When an H-IRT model with a large number of dimensions is used for an adaptive test, the computational burden associated with interim scoring and selection of subsequent items is heavy. An alternative approach for any high-dimension adaptive test is to reduce dimensionality for interim scoring and item selection and then revert to full dimensionality for final score reporting, thereby significantly reducing the computational burden. This study compared the accuracy and efficiency of final scoring for multidimensional, local multidimensional, and unidimensional item selection and interim scoring methods, using both simulated and real item pools. The simulation study was conducted under 10 conditions (i.e., five test lengths and two H-IRT models) with a simulated sample of 10,000 students. The study with the real item pool was conducted using item parameters from an actual 45-item adaptive test with a simulated sample of 10,000 students. Results indicate that the theta estimations provided by the local multidimensional and unidimensional item selection and interim scoring methods were relatively as accurate as the theta estimation provided by the multidimensional item selection and interim scoring method, especially during the real item pool study. In addition, the multidimensional method required the longest computation time and the unidimensional method required the shortest computation time.}, doi = {10.1177/0146621618765714}, url = {https://doi.org/10.1177/0146621618765714}, author = {Wenhao Wang and Neal Kingston} } @article {2702, title = {How Adaptive Is an Adaptive Test: Are All Adaptive Tests Adaptive?}, journal = {Journal of Computerized Adaptive Testing}, volume = {7}, year = {2019}, pages = {1-14}, keywords = {computerized adaptive test, multistage test, statistical indicators of amount of adaptation}, doi = {10.7333/1902-0701001}, url = {http://iacat.org/jcat/index.php/jcat/article/view/69/34}, author = {Mark Reckase and Unhee Ju and Sewon Kim} } @article {2673, title = {Adaptive Item Selection Under Matroid Constraints}, journal = {Journal of Computerized Adaptive Testing}, volume = {6}, year = {2018}, pages = {15-36}, doi = {10.7333/1808-0602015}, url = {http://www.iacat.org/jcat/index.php/jcat/article/view/64/32}, author = {Daniel Bengs and Ulf Brefeld and Ulf Kr{\"o}hne} } @article {2612, title = {Measuring patient-reported outcomes adaptively: Multidimensionality matters!}, journal = {Applied Psychological Measurement}, year = {2018}, doi = {10.1177/0146621617733954}, author = {Paap, Muirne C. S. and Kroeze, Karel A. and Glas, C. A. W. and Terwee, C. B. and van der Palen, Job and Veldkamp, Bernard P.} } @article {2696, title = {A Top-Down Approach to Designing the Computerized Adaptive Multistage Test}, journal = {Journal of Educational Measurement}, volume = {55}, number = {2}, year = {2018}, pages = {243-263}, abstract = {Abstract The top-down approach to designing a multistage test is relatively understudied in the literature and underused in research and practice. This study introduced a route-based top-down design approach that directly sets design parameters at the test level and utilizes the advanced automated test assembly algorithm seeking global optimality. The design process in this approach consists of five sub-processes: (1) route mapping, (2) setting objectives, (3) setting constraints, (4) routing error control, and (5) test assembly. Results from a simulation study confirmed that the assembly, measurement and routing results of the top-down design eclipsed those of the bottom-up design. Additionally, the top-down design approach provided unique insights into design decisions that could be used to refine the test. Regardless of these advantages, it is recommended applying both top-down and bottom-up approaches in a complementary manner in practice.}, doi = {10.1111/jedm.12174}, url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/jedm.12174}, author = {Luo, Xiao and Kim, Doyoung} } @article {2597, title = {The Development of MST Test Information for the Prediction of Test Performances}, journal = {Educational and Psychological Measurement}, volume = {77}, number = {4}, year = {2017}, pages = {570-586}, abstract = {The current study proposes novel methods to predict multistage testing (MST) performance without conducting simulations. This method, called MST test information, is based on analytic derivation of standard errors of ability estimates across theta levels. We compared standard errors derived analytically to the simulation results to demonstrate the validity of the proposed method in both measurement precision and classification accuracy. The results indicate that the MST test information effectively predicted the performance of MST. In addition, the results of the current study highlighted the relationship among the test construction, MST design factors, and MST performance.}, doi = {10.1177/0013164416662960}, url = {http://dx.doi.org/10.1177/0013164416662960}, author = {Ryoungsun Park and Jiseon Kim and Hyewon Chung and Barbara G. Dodd} } @article {2580, title = {Dual-Objective Item Selection Criteria in Cognitive Diagnostic Computerized Adaptive Testing}, journal = {Journal of Educational Measurement}, volume = {54}, number = {2}, year = {2017}, pages = {165{\textendash}183}, abstract = {The development of cognitive diagnostic-computerized adaptive testing (CD-CAT) has provided a new perspective for gaining information about examinees{\textquoteright} mastery on a set of cognitive attributes. This study proposes a new item selection method within the framework of dual-objective CD-CAT that simultaneously addresses examinees{\textquoteright} attribute mastery status and overall test performance. The new procedure is based on the Jensen-Shannon (JS) divergence, a symmetrized version of the Kullback-Leibler divergence. We show that the JS divergence resolves the noncomparability problem of the dual information index and has close relationships with Shannon entropy, mutual information, and Fisher information. The performance of the JS divergence is evaluated in simulation studies in comparison with the methods available in the literature. Results suggest that the JS divergence achieves parallel or more precise recovery of latent trait variables compared to the existing methods and maintains practical advantages in computation and item pool usage.}, issn = {1745-3984}, doi = {10.1111/jedm.12139}, url = {http://dx.doi.org/10.1111/jedm.12139}, author = {Kang, Hyeon-Ah and Zhang, Susu and Chang, Hua-Hua} } @conference {2663, title = {From Blueprints to Systems: An Integrated Approach to Adaptive Testing}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

For years, test blueprints have told test developers how many items and what types of items will be included in a test. Adaptive testing adopted this approach from paper testing, and it is reasonably useful. Unfortunately, \&$\#$39;how many items and what types of items\&$\#$39; are not all the elements one should consider when choosing items for an adaptive test. To fill in gaps, practitioners have developed tools to allow an adaptive test to behave appropriately (i.e. examining exposure control, content balancing, item drift procedures, etc.). Each of these tools involves the use of a separate process external to the primary item selection process.

The use of these subsidiary processes makes item selection less optimal and makes it difficult to prioritize aspects of selection. This discussion describes systems-based adaptive testing. This approach uses metadata concerning items, test takers and test elements to select items. These elements are weighted by the stakeholders to shape an expanded blueprint designed for adaptive testing.\ 

Session Video

}, keywords = {CAT, integrated approach, Keynote}, url = {https://drive.google.com/open?id=1CBaAfH4ES7XivmvrMjPeKyFCsFZOpQMJ}, author = {Gage Kingsbury and Tony Zara} } @article {2614, title = {Item usage in a multidimensional computerized adaptive test (MCAT) measuring health-related quality of life}, journal = {Quality of Life Research}, volume = {26}, number = {11}, year = {2017}, pages = {2909{\textendash}2918}, issn = {1573-2649}, doi = {10.1007/s11136-017-1624-3}, url = {https://doi.org/10.1007/s11136-017-1624-3}, author = {Paap, Muirne C. S. and Kroeze, Karel A. and Terwee, Caroline B. and van der Palen, Job and Veldkamp, Bernard P.} } @conference {2665, title = {Multi-stage Testing for a Multi-disciplined End-of primary-school Test }, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

The Dutch secondary education system consists of five levels: basic, lower, and middle vocational education, general secondary education, and pre-academic education. The individual decision for level of secondary education is based on a combination of the teacher\’s judgment and an end-of-primaryschool placement test.

This placement test encompasses the measurement of reading, language, mathematics and writing; each skill consisting of one to four subdomains. The Dutch end-of-primaryschool test is currently administered in two linear 200-item paper-based versions. The two versions differ in difficulty so as to motivate both less able and more able students, and measure both groups of students precisely. The primary goal of the test is providing a placement advice for five levels of secondary education. The secondary goal is the assessment of six different fundamental reference levels defined on reading, language, and mathematics. Because of the high stakes advice of the test, the Dutch parliament has instructed to change the format to a multistage test. A major advantage of multistage testing is that the tailoring of the tests is more strongly related to the ability of the students than to the teacher\’s judgment. A separate multistage test is under development for each of the three skills measured by the reference levels to increase the classification accuracy for secondary education placement and to optimally measure the performance on the reference-level-related skills.

This symposium consists of three presentations discussing the challenges in transitioning from a linear paper-based test to a computer-based multistage test within an existing curriculum and the specification of the multistage test to meet the measurement purposes. The transitioning to a multistage test has to improve both classification accuracy and measurement precision.

First, we describe the Dutch educational system and the role of the end-of-primary-school placement test within this system. Special attention will be paid to the advantages of multistage testing over both linear testing and computerized adaptive testing, and on practical implications related to the transitioning from a linear to a multistage test.

Second, we discuss routing and reporting on the new multi-stage test. Both topics have a major impact on the quality of the placement advice and the reference mastery decisions. Several methods for routing and reporting are compared.

Third, the linear test contains 200 items to cover a broad range of different skills and to obtain a precise measurement of those skills separately. Multistage testing creates opportunities to reduce the cognitive burden for the students while maintaining the same quality of placement advice and assessment of mastering of reference levels. This presentation focuses on optimal allocation of items to test modules, optimal number of stages and modules per stage and test length reduction.

Session Video 1

Session Video 2

}, keywords = {mst, Multidisciplined, proficiency}, url = {https://drive.google.com/open?id=1C5ys178p_Wl9eemQuIsI56IxDTck2z8P}, author = {Hendrik Straat and Maaike van Groen and Wobbe Zijlstra and Marie-Anne Keizer-Mittelha{\"e}user and Michel Lamor{\'e}} } @conference {2638, title = {A New Cognitive Diagnostic Computerized Adaptive Testing for Simultaneously Diagnosing Skills and Misconceptions}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

In education diagnoses, diagnosing misconceptions is important as well as diagnosing skills. However, traditional cognitive diagnostic computerized adaptive testing (CD-CAT) is usually developed to diagnose skills. This study aims to propose a new CD-CAT that can simultaneously diagnose skills and misconceptions. The proposed CD-CAT is based on a recently published new CDM, called the simultaneously identifying skills and misconceptions (SISM) model (Kuo, Chen, \& de la Torre, in press). A new item selection algorithm is also proposed in the proposed CD-CAT for achieving high adaptive testing performance. In simulation studies, we compare our new item selection algorithm with three existing item selection methods, including the Kullback\–Leibler (KL) and posterior-weighted KL (PWKL) proposed by Cheng (2009) and the modified PWKL (MPWKL) proposed by Kaplan, de la Torre, and Barrada (2015). The results show that our proposed CD-CAT can efficiently diagnose skills and misconceptions; the accuracy of our new item selection algorithm is close to the MPWKL but less computational burden; and our new item selection algorithm outperforms the KL and PWKL methods on diagnosing skills and misconceptions.

References

Cheng, Y. (2009). When cognitive diagnosis meets computerized adaptive testing: CD-CAT. Psychometrika, 74(4), 619\–632. doi: 10.1007/s11336-009-9123-2

Kaplan, M., de la Torre, J., \& Barrada, J. R. (2015). New item selection methods for cognitive diagnosis computerized adaptive testing. Applied Psychological Measurement, 39(3), 167\–188. doi:10.1177/0146621614554650

Kuo, B.-C., Chen, C.-H., \& de la Torre, J. (in press). A cognitive diagnosis model for identifying coexisting skills and misconceptions. Applied Psychological Measurement.

Session Video

}, keywords = {CD-CAT, Misconceptions, Simultaneous diagnosis}, author = {Bor-Chen Kuo and Chun-Hua Chen} } @article {2607, title = {Projection-Based Stopping Rules for Computerized Adaptive Testing in Licensure Testing}, journal = {Applied Psychological MeasurementApplied Psychological Measurement}, volume = {42}, year = {2017}, month = {2018/06/01}, pages = {275 - 290}, abstract = {The confidence interval (CI) stopping rule is commonly used in licensure settings to make classification decisions with fewer items in computerized adaptive testing (CAT). However, it tends to be less efficient in the near-cut regions of the ? scale, as the CI often fails to be narrow enough for an early termination decision prior to reaching the maximum test length. To solve this problem, this study proposed the projection-based stopping rules that base the termination decisions on the algorithmically projected range of the final ? estimate at the hypothetical completion of the CAT. A simulation study and an empirical study were conducted to show the advantages of the projection-based rules over the CI rule, in which the projection-based rules reduced the test length without jeopardizing critical psychometric qualities of the test, such as the ? and classification precision. Operationally, these rules do not require additional regularization parameters, because the projection is simply a hypothetical extension of the current test within the existing CAT environment. Because these new rules are specifically designed to address the decreased efficiency in the near-cut regions as opposed to for the entire scale, the authors recommend using them in conjunction with the CI rule in practice.}, isbn = {0146-6216}, url = {https://doi.org/10.1177/0146621617726790}, author = {Luo, Xiao and Kim, Doyoung and Dickison, Philip} } @article {2616, title = {The validation of a computer-adaptive test (CAT) for assessing health-related quality of life in children and adolescents in a clinical sample: study design, methods and first results of the Kids-CAT study}, journal = {Quality of Life Research}, volume = {26}, number = {5}, year = {2017}, month = {May}, pages = {1105{\textendash}1117}, abstract = {Recently, we developed a computer-adaptive test (CAT) for assessing health-related quality of life (HRQoL) in children and adolescents: the Kids-CAT. It measures five generic HRQoL dimensions. The aims of this article were (1) to present the study design and (2) to investigate its psychometric properties in a clinical setting.}, issn = {1573-2649}, doi = {10.1007/s11136-016-1437-9}, url = {https://doi.org/10.1007/s11136-016-1437-9}, author = {Barthel, D. and Otto, C. and Nolte, S. and Meyrose, A.-K. and Fischer, F. and Devine, J. and Walter, O. and Mierke, A. and Fischer, K. I. and Thyen, U. and Klein, M. and Ankermann, T. and Rose, M. and Ravens-Sieberer, U.} } @article {2491, title = {Effect of Imprecise Parameter Estimation on Ability Estimation in a Multistage Test in an Automatic Item Generation Context }, journal = {Journal of Computerized Adaptive Testing}, volume = {4}, year = {2016}, pages = {1-18}, keywords = {Adaptive Testing, automatic item generation, errors in item parameters, item clones, multistage testing}, issn = {2165-6592 }, doi = {10.7333/1608-040101}, url = {http://iacat.org/jcat/index.php/jcat/article/view/59/27}, author = {Colvin, Kimberly and Keller, Lisa A and Robin, Frederic} } @article {2463, title = {Modeling Student Test-Taking Motivation in the Context of an Adaptive Achievement Test}, journal = {Journal of Educational Measurement}, volume = {53}, number = {1}, year = {2016}, pages = {86{\textendash}105}, abstract = {This study examined the utility of response time-based analyses in understanding the behavior of unmotivated test takers. For the data from an adaptive achievement test, patterns of observed rapid-guessing behavior and item response accuracy were compared to the behavior expected under several types of models that have been proposed to represent unmotivated test taking behavior. Test taker behavior was found to be inconsistent with these models, with the exception of the effort-moderated model. Effort-moderated scoring was found to both yield scores that were more accurate than those found under traditional scoring, and exhibit improved person fit statistics. In addition, an effort-guided adaptive test was proposed and shown by a simulation study to alleviate item difficulty mistargeting caused by unmotivated test taking.}, issn = {1745-3984}, doi = {10.1111/jedm.12102}, url = {http://dx.doi.org/10.1111/jedm.12102}, author = {Wise, Steven L. and Kingsbury, G. Gage} } @article {2505, title = {Parameter Drift Detection in Multidimensional Computerized Adaptive Testing Based on Informational Distance/Divergence Measures}, journal = {Applied Psychological Measurement}, volume = {40}, number = {7}, year = {2016}, pages = {534-550}, abstract = {An informational distance/divergence-based approach is proposed to detect the presence of parameter drift in multidimensional computerized adaptive testing (MCAT). The study presents significance testing procedures for identifying changes in multidimensional item response functions (MIRFs) over time based on informational distance/divergence measures that capture the discrepancy between two probability functions. To approximate the MIRFs from the observed response data, the k-nearest neighbors algorithm is used with the random search method. A simulation study suggests that the distance/divergence-based drift measures perform effectively in identifying the instances of parameter drift in MCAT. They showed moderate power with small samples of 500 examinees and excellent power when the sample size was as large as 1,000. The proposed drift measures also adequately controlled for Type I error at the nominal level under the null hypothesis.}, doi = {10.1177/0146621616663676}, url = {http://apm.sagepub.com/content/40/7/534.abstract}, author = {Kang, Hyeon-Ah and Chang, Hua-Hua} } @article {2459, title = {Stochastic Curtailment of Questionnaires for Three-Level Classification: Shortening the CES-D for Assessing Low, Moderate, and High Risk of Depression}, journal = {Applied Psychological Measurement}, volume = {40}, number = {1}, year = {2016}, pages = {22-36}, abstract = {In clinical assessment, efficient screeners are needed to ensure low respondent burden. In this article, Stochastic Curtailment (SC), a method for efficient computerized testing for classification into two classes for observable outcomes, was extended to three classes. In a post hoc simulation study using the item scores on the Center for Epidemiologic Studies{\textendash}Depression Scale (CES-D) of a large sample, three versions of SC, SC via Empirical Proportions (SC-EP), SC via Simple Ordinal Regression (SC-SOR), and SC via Multiple Ordinal Regression (SC-MOR) were compared at both respondent burden and classification accuracy. All methods were applied under the regular item order of the CES-D and under an ordering that was optimal in terms of the predictive power of the items. Under the regular item ordering, the three methods were equally accurate, but SC-SOR and SC-MOR needed less items. Under the optimal ordering, additional gains in efficiency were found, but SC-MOR suffered from capitalization on chance substantially. It was concluded that SC-SOR is an efficient and accurate method for clinical screening. Strengths and weaknesses of the methods are discussed.}, doi = {10.1177/0146621615592294}, url = {http://apm.sagepub.com/content/40/1/22.abstract}, author = {Smits, Niels and Finkelman, Matthew D. and Kelderman, Henk} } @article {2384, title = {Assessing Individual-Level Impact of Interruptions During Online Testing}, journal = {Journal of Educational Measurement}, volume = {52}, number = {1}, year = {2015}, pages = {80{\textendash}105}, abstract = {With an increase in the number of online tests, the number of interruptions during testing due to unexpected technical issues seems to be on the rise. For example, interruptions occurred during several recent state tests. When interruptions occur, it is important to determine the extent of their impact on the examinees{\textquoteright} scores. Researchers such as Hill and Sinharay et~al. examined the impact of interruptions at an aggregate level. However, there is a lack of research on the assessment of impact of interruptions at an individual level. We attempt to fill that void. We suggest four methodological approaches, primarily based on statistical hypothesis testing, linear regression, and item response theory, which can provide evidence on the individual-level impact of interruptions. We perform a realistic simulation study to compare the Type~I error rate and power of the suggested approaches. We then apply the approaches to data from the 2013 Indiana Statewide Testing for Educational Progress-Plus~(ISTEP+) test that experienced interruptions.}, issn = {1745-3984}, doi = {10.1111/jedm.12064}, url = {http://dx.doi.org/10.1111/jedm.12064}, author = {Sinharay, Sandip and Wan, Ping and Choi, Seung W. and Kim, Dong-In} } @article {2383, title = {A Comparison of IRT Proficiency Estimation Methods Under Adaptive Multistage Testing}, journal = {Journal of Educational Measurement}, volume = {52}, number = {1}, year = {2015}, pages = {70{\textendash}79}, abstract = {This inquiry is an investigation of item response theory (IRT) proficiency estimators{\textquoteright} accuracy under multistage testing (MST). We chose a two-stage MST design that includes four modules (one at Stage 1, three at Stage 2) and three difficulty paths (low, middle, high). We assembled various two-stage MST panels (i.e., forms) by manipulating two assembly conditions in each module, such as difficulty level and module length. For each panel, we investigated the accuracy of examinees{\textquoteright} proficiency levels derived from seven IRT proficiency estimators. The choice of Bayesian (prior) versus non-Bayesian (no prior) estimators was of more practical significance than the choice of number-correct versus item-pattern scoring estimators. The Bayesian estimators were slightly more efficient than the non-Bayesian estimators, resulting in smaller overall error. Possible score changes caused by the use of different proficiency estimators would be nonnegligible, particularly for low- and high-performing examinees.}, issn = {1745-3984}, doi = {10.1111/jedm.12063}, url = {http://dx.doi.org/10.1111/jedm.12063}, author = {Kim, Sooyeon and Moses, Tim and Yoo, Hanwook (Henry)} } @article {2484, title = {Evaluating Content Alignment in Computerized Adaptive Testing}, journal = {Educational Measurement: Issues and Practice}, volume = {34}, number = {41-48}, year = {2015}, abstract = {The alignment between a test and the content domain it measures represents key evidence for the validation of test score inferences. Although procedures have been developed for evaluating the content alignment of linear tests, these procedures are not readily applicable to computerized adaptive tests (CATs), which require large item pools and do not use fixed test forms. This article describes the decisions made in the development of CATs that influence and might threaten content alignment. It outlines a process for evaluating alignment that is sensitive to these threats and gives an empirical example of the process.}, doi = {http://dx.doi.org/10.1111/emip.12094}, author = {Wise, S. L. and Kingsbury, G. G. and Webb, N. L.} } @article {2457, title = {Investigation of Response Changes in the GRE Revised General Test}, journal = {Educational and Psychological Measurement}, volume = {75}, number = {6}, year = {2015}, pages = {1002-1020}, abstract = {Research on examinees{\textquoteright} response changes on multiple-choice tests over the past 80 years has yielded some consistent findings, including that most examinees make score gains by changing answers. This study expands the research on response changes by focusing on a high-stakes admissions test{\textemdash}the Verbal Reasoning and Quantitative Reasoning measures of the GRE revised General Test. We analyzed data from 8,538 examinees for Quantitative and 9,140 for Verbal sections who took the GRE revised General Test in 12 countries. The analyses yielded findings consistent with prior research. In addition, as examinees{\textquoteright} ability increases, the benefit of response changing increases. The study yielded significant implications for both test agencies and test takers. Computer adaptive tests often do not allow the test takers to review and revise. Findings from this study confirm the benefit of such features.}, doi = {10.1177/0013164415573988}, url = {http://epm.sagepub.com/content/75/6/1002.abstract}, author = {Liu, Ou Lydia and Bridgeman, Brent and Gu, Lixiong and Xu, Jun and Kong, Nan} } @article {2393, title = {New Item Selection Methods for Cognitive Diagnosis Computerized Adaptive Testing}, journal = {Applied Psychological Measurement}, volume = {39}, number = {3}, year = {2015}, pages = {167-188}, abstract = {This article introduces two new item selection methods, the modified posterior-weighted Kullback{\textendash}Leibler index (MPWKL) and the generalized deterministic inputs, noisy {\textquotedblleft}and{\textquotedblright} gate (G-DINA) model discrimination index (GDI), that can be used in cognitive diagnosis computerized adaptive testing. The efficiency of the new methods is compared with the posterior-weighted Kullback{\textendash}Leibler (PWKL) item selection index using a simulation study in the context of the G-DINA model. The impact of item quality, generating models, and test termination rules on attribute classification accuracy or test length is also investigated. The results of the study show that the MPWKL and GDI perform very similarly, and have higher correct attribute classification rates or shorter mean test lengths compared with the PWKL. In addition, the GDI has the shortest implementation time among the three indices. The proportion of item usage with respect to the required attributes across the different conditions is also tracked and discussed.}, doi = {10.1177/0146621614554650}, url = {http://apm.sagepub.com/content/39/3/167.abstract}, author = {Kaplan, Mehmet and de la Torre, Jimmy and Barrada, Juan Ram{\'o}n} } @article {2351, title = {Cognitive Diagnostic Models and Computerized Adaptive Testing: Two New Item-Selection Methods That Incorporate Response Times}, journal = {Journal of Computerized Adaptive Testing}, volume = {2}, year = {2014}, pages = {59-76}, doi = {10.7333/1412-0204059}, url = {http://www.iacat.org/jcat/index.php/jcat/article/view/43/21}, author = {Finkelman, M. D. and Kim, W. and Weissman, A. and Cook, R.J.} } @article {2350, title = {Determining the Overall Impact of Interruptions During Online Testing}, journal = {Journal of Educational Measurement}, volume = {51}, number = {4}, year = {2014}, pages = {419{\textendash}440}, abstract = {

With an increase in the number of online tests, interruptions during testing due to unexpected technical issues seem unavoidable. For example, interruptions occurred during several recent state tests. When interruptions occur, it is important to determine the extent of their impact on the examinees\’ scores. There is a lack of research on this topic due to the novelty of the problem. This article is an attempt to fill that void. Several methods, primarily based on propensity score matching, linear regression, and item response theory, were suggested to determine the overall impact of the interruptions on the examinees\’ scores. A realistic simulation study shows that the suggested methods have satisfactory Type I error rate and power. Then the methods were applied to data from the Indiana Statewide Testing for Educational Progress-Plus (ISTEP+) test that experienced interruptions in 2013. The results indicate that the interruptions did not have a significant overall impact on the student scores for the ISTEP+ test.

}, issn = {1745-3984}, doi = {10.1111/jedm.12052}, url = {http://dx.doi.org/10.1111/jedm.12052}, author = {Sinharay, Sandip and Wan, Ping and Whitaker, Mike and Kim, Dong-In and Zhang, Litong and Choi, Seung W.} } @article {2332, title = {Enhancing Pool Utilization in Constructing the Multistage Test Using Mixed-Format Tests}, journal = {Applied Psychological Measurement}, volume = {38}, number = {4}, year = {2014}, pages = {268-280}, abstract = {

This study investigated a new pool utilization method of constructing multistage tests (MST) using the mixed-format test based on the generalized partial credit model (GPCM). MST simulations of a classification test were performed to evaluate the MST design. A linear programming (LP) model was applied to perform MST reassemblies based on the initial MST construction. Three subsequent MST reassemblies were performed. For each reassembly, three test unit replacement ratios (TRRs; 0.22, 0.44, and 0.66) were investigated. The conditions of the three passing rates (30\%, 50\%, and 70\%) were also considered in the classification testing. The results demonstrated that various MST reassembly conditions increased the overall pool utilization rates, while maintaining the desired MST construction. All MST testing conditions performed equally well in terms of the precision of the classification decision.

}, doi = {10.1177/0146621613515545}, url = {http://apm.sagepub.com/content/38/4/268.abstract}, author = {Park, Ryoungsun and Kim, Jiseon and Chung, Hyewon and Dodd, Barbara G.} } @article {2341, title = {Stratified Item Selection and Exposure Control in Unidimensional Adaptive Testing in the Presence of Two-Dimensional Data}, journal = {Applied Psychological Measurement}, volume = {38}, number = {7}, year = {2014}, pages = {563-576}, abstract = {

It is not uncommon to use unidimensional item response theory models to estimate ability in multidimensional data with computerized adaptive testing (CAT). The current Monte Carlo study investigated the penalty of this model misspecification in CAT implementations using different item selection methods and exposure control strategies. Three item selection methods\—maximum information (MAXI), a-stratification (STRA), and a-stratification with b-blocking (STRB) with and without Sympson\–Hetter (SH) exposure control strategy\—were investigated. Calibrating multidimensional items as unidimensional items resulted in inaccurate item parameter estimates. Therefore, MAXI performed better than STRA and STRB in estimating the ability parameters. However, all three methods had relatively large standard errors. SH exposure control had no impact on the number of overexposed items. Existing unidimensional CAT implementations might consider using MAXI only if recalibration as multidimensional model is too expensive. Otherwise, building a CAT pool by calibrating multidimensional data as unidimensional is not recommended.

}, doi = {10.1177/0146621614536768}, url = {http://apm.sagepub.com/content/38/7/563.abstract}, author = {Kalinowski, Kevin E. and Natesan, Prathiba and Henson, Robin K.} } @article {2288, title = {Item Ordering in Stochastically Curtailed Health Questionnaires With an Observable Outcome}, journal = {Journal of Computerized Adaptive Testing}, volume = {1}, year = {2013}, pages = {38-66}, doi = {10.7333/1304-0103038}, author = {Finkelman, M. D. and Kim, W. and He, Y. and Lai, A.M.} } @inbook {2273, title = {Reporting differentiated literacy results in PISA by using multidimensional adaptive testing. }, booktitle = {Research on PISA.}, year = {2013}, publisher = {Dodrecht: Springer}, organization = {Dodrecht: Springer}, author = {Frey, A. and Seitz, N-N. and Kr{\"o}hne, U.} } @article {1996, title = {Computerized Adaptive Testing for Student Selection to Higher Education}, journal = {Journal of Higher Education}, year = {2012}, chapter = {1}, abstract = {

The purpose of the present study is to discuss applicability of computerized adaptive testing format as an alternative for current student selection examinations to higher education in Turkey. In the study, first problems associated with current student selection system are given. These problems exerts pressure on students that results in test anxiety, produce measurement experiences that can be criticized, and lessen credibility of student selection system. Next, computerized adaptive test are introduced and advantages they provide are presented. Then results of a study that used two research designs (simulation and live testing) were presented. Results revealed that (i) computerized adaptive format provided a reduction up to 80\% in the number of items given to students compared to paper and pencil format of student selection examination, (ii) ability estimations have high reliabilities. Correlations between ability estimations obtained from simulation and traditional format were higher than 0.80. At the end of the study solutions provided by computerized adaptive testing implementation to the current problems were discussed. Also some issues for application of CAT format for student selection examinations in Turkey are given.

}, author = {Kalender, I.} } @article {2263, title = {Development of a computerized adaptive test for depression}, journal = {Archives of General Psychiatry}, volume = {69}, year = {2012}, pages = {1105-1112}, doi = {10.1001/archgenpsychiatry.2012.14}, url = {WWW.ARCHGENPSYCHIATRY.COM}, author = {Robert D. Gibbons and David .J. Weiss and Paul A. Pilkonis and Ellen Frank and Tara Moore and Jong Bae Kim and David J. Kupfer} } @article {2166, title = {An Empirical Evaluation of the Slip Correction in the Four Parameter Logistic Models With Computerized Adaptive Testing}, journal = {Applied Psychological Measurement}, volume = {36}, number = {2}, year = {2012}, pages = {75-87}, abstract = {

In a selected response test, aberrant responses such as careless errors and lucky guesses might cause error in ability estimation because these responses do not actually reflect the knowledge that examinees possess. In a computerized adaptive test (CAT), these aberrant responses could further cause serious estimation error due to dynamic item administration. To enhance the robust performance of CAT against aberrant responses, Barton and Lord proposed the four-parameter logistic (4PL) item response theory (IRT) model. However, most studies relevant to the 4PL IRT model were conducted based on simulation experiments. This study attempts to investigate the performance of the 4PL IRT model as a slip-correction mechanism with an empirical experiment. The results showed that the 4PL IRT model could not only reduce the problematic underestimation of the examinees\’ ability introduced by careless mistakes in practical situations but also improve measurement efficiency.

}, doi = {10.1177/0146621611432862}, url = {http://apm.sagepub.com/content/36/2/75.abstract}, author = {Yen, Yung-Chin and Ho, Rong-Guey and Laio, Wen-Wei and Chen, Li-Ju and Kuo, Ching-Chin} } @article {2211, title = {Panel Design Variations in the Multistage Test Using the Mixed-Format Tests}, journal = {Educational and Psychological Measurement}, volume = {72}, number = {4}, year = {2012}, pages = {574-588}, abstract = {

This study compared various panel designs of the multistage test (MST) using mixed-format tests in the context of classification testing. Simulations varied the design of the first-stage module. The first stage was constructed according to three levels of test information functions (TIFs) with three different TIF centers. Additional computerized adaptive test (CAT) conditions provided baseline comparisons. Three passing rate conditions were also included. The various MST conditions using mixed-format tests were constructed properly and performed well. When the levels of TIFs at the first stage were higher, the simulations produced a greater number of correct classifications. CAT with the randomesque-10 procedure yielded comparable results to the MST with increased levels of TIFs. Finally, all MST conditions achieved better test security results compared with CAT\’s maximum information conditions.

}, doi = {10.1177/0013164411428977}, url = {http://epm.sagepub.com/content/72/4/574.abstract}, author = {Kim, Jiseon and Chung, Hyewon and Dodd, Barbara G. and Park, Ryoungsun} } @conference {2100, title = {Continuous Testing (an avenue for CAT research)}, booktitle = {Annual Conference of the International Association for Computerized Adaptive Testing}, year = {2011}, month = {10/2011}, abstract = {

Publishing an Adaptive Test

Problems with Publishing

Research Questions

}, keywords = {CAT, item filter, item filtration}, author = {G. Gage Kingsbury} } @article {2038, title = {Creating a K-12 Adaptive Test: Examining the Stability of Item Parameter Estimates and Measurement Scales}, journal = {Journal of Applied Testing Technology}, volume = {12}, year = {2011}, abstract = {

Development of adaptive tests used in K-12 settings requires the creation of stable measurement scales to measure the growth of individual students from one grade to the next, and to measure change in groups from one year to the next. Accountability systems
like No Child Left Behind require stable measurement scales so that accountability has meaning across time. This study examined the stability of the measurement scales used with the Measures of Academic Progress. Difficulty estimates for test questions from the reading and mathematics scales were examined over a period ranging from 7 to 22 years. Results showed high correlations between item difficulty estimates from the time at which they where originally calibrated and the current calibration. The average drift in item difficulty estimates was less than .01 standard deviations. The average impact of change in item difficulty estimates was less than the smallest reported difference on the score scale for two actual tests. The findings of the study indicate that an IRT scale can be stable enough to allow consistent measurement of student achievement.

}, url = {http://www.testpublishers.org/journal-of-applied-testing-technology}, author = {Kingsbury, G. G. and Wise, S. L.} } @booklet {154, title = {Cross-cultural development of an item list for computer-adaptive testing of fatigue in oncological patients}, journal = {Health and Quality of Life Outcomes}, volume = {9}, number = {1}, year = {2011}, note = {Health Qual Life Outcomes. 2011 Mar 29;9(1):19.}, month = {March 29, 2011}, pages = {10}, edition = {2011/03/31}, abstract = {ABSTRACT: INTRODUCTION: Within an ongoing project of the EORTC Quality of Life Group, we are developing computerized adaptive test (CAT) measures for the QLQ-C30 scales. These new CAT measures are conceptualised to reflect the same constructs as the QLQ-C30 scales. Accordingly, the Fatigue-CAT is intended to capture physical and general fatigue. METHODS: The EORTC approach to CAT development comprises four phases (literature search, operationalisation, pre-testing, and field testing). Phases I-III are described in detail in this paper. A literature search for fatigue items was performed in major medical databases. After refinement through several expert panels, the remaining items were used as the basis for adapting items and/or formulating new items fitting the EORTC item style. To obtain feedback from patients with cancer, these English items were translated into Danish, French, German, and Spanish and tested in the respective countries. RESULTS: Based on the literature search a list containing 588 items was generated. After a comprehensive item selection procedure focusing on content, redundancy, item clarity and item difficulty a list of 44 fatigue items was generated. Patient interviews (n=52) resulted in 12 revisions of wording and translations. DISCUSSION: The item list developed in phases I-III will be further investigated within a field-testing phase (IV) to examine psychometric characteristics and to fit an item response theory model. The Fatigue CAT based on this item bank will provide scores that are backward-compatible to the original QLQ-C30 fatigue scale.}, isbn = {1477-7525 (Electronic)1477-7525 (Linking)}, author = {Giesinger, J. M. and Petersen, M. A. and Groenvold, M. and Aaronson, N. K. and Arraras, J. I. and Conroy, T. and Gamper, E. M. and Kemmler, G. and King, M. T. and Oberguggenberger, A. S. and Velikova, G. and Young, T. and Holzner, B. and Eortc-Qlg, E. O.} } @mastersthesis {1997, title = {Effects of Different Computerized Adaptive Testing Strategies of Recovery of Ability}, volume = {Ph.D.}, year = {2011}, abstract = {

The purpose of the present study is to compare ability estimations obtained from computerized adaptive testing (CAT) procedure with the paper and pencil test administration results of Student Selection Examination (SSE) science subtest considering different ability estimation methods and test termination rules. There are two phases in the present study. In the first phase, a post-hoc simulation was conducted to find out relationships between examinee ability levels estimated by CAT and paper and pencil test versions of the SSE. Maximum Likelihood Estimation and Expected A Posteriori were used as ability estimation method. Test termination rules were standard error threshold and fixed number of items. Second phase was actualized by implementing a CAT administration to a group of examinees to investigate performance of CAT administration in an environment other than simulated administration. Findings of post-hoc simulations indicated CAT could be implemented by using Expected A Posteriori estimation method with standard error threshold value of 0.30 or higher for SSE. Correlation between ability estimates obtained by CAT and real SSE was found to be 0.95. Mean of number of items given to examinees by CAT is 18.4. Correlation between live CAT and real SSE ability estimations was 0.74. Number of items used for CAT administration is approximately 50\% of the items in paper and pencil SSE science subtest. Results indicated that CAT for SSE science subtest provided ability estimations with higher reliability with fewer items compared to paper and pencil format.

}, author = {Kalender, I.} } @conference {2078, title = {High-throughput Health Status Measurement using CAT in the Era of Personal Genomics: Opportunities and Challenges}, booktitle = {Annual Conference of the International Association for Computerized Adaptive Testing}, year = {2011}, keywords = {CAT, health applications, PROMIS}, author = {Eswar Krishnan} } @article {2246, title = {A new adaptive testing algorithm for shortening health literacy assessments}, journal = {BMC Medical Informatics and Decision Making}, volume = {11}, year = {2011}, abstract = {

http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3178473/?tool=pmcentrez
}, doi = {10.1186/1472-6947-11-52}, author = {Kandula, S. and Ancker, J.S. and Kaufman, D.R. and Currie, L.M. and Qing, Z.-T.} } @inbook {2064, title = {Detecting Person Misfit in Adaptive Testing}, booktitle = {Elements of Adaptive Testing}, year = {2010}, pages = {315-329}, chapter = {16}, doi = {10.1007/978-0-387-85461-8}, author = {Meijer, R. R. and van Krimpen-Stoop, E. M. L. A.} } @article {312, title = {Development of computerized adaptive testing (CAT) for the EORTC QLQ-C30 physical functioning dimension}, journal = {Quality of Life Research }, volume = {20}, number = {4}, year = {2010}, note = {Qual Life Res. 2010 Oct 23.}, pages = {479-490}, edition = {2010/10/26}, abstract = {PURPOSE: Computerized adaptive test (CAT) methods, based on item response theory (IRT), enable a patient-reported outcome instrument to be adapted to the individual patient while maintaining direct comparability of scores. The EORTC Quality of Life Group is developing a CAT version of the widely used EORTC QLQ-C30. We present the development and psychometric validation of the item pool for the first of the scales, physical functioning (PF). METHODS: Initial developments (including literature search and patient and expert evaluations) resulted in 56 candidate items. Responses to these items were collected from 1,176 patients with cancer from Denmark, France, Germany, Italy, Taiwan, and the United Kingdom. The items were evaluated with regard to psychometric properties. RESULTS: Evaluations showed that 31 of the items could be included in a unidimensional IRT model with acceptable fit and good content coverage, although the pool may lack items at the upper extreme (good PF). There were several findings of significant differential item functioning (DIF). However, the DIF findings appeared to have little impact on the PF estimation. CONCLUSIONS: We have established an item pool for CAT measurement of PF and believe that this CAT instrument will clearly improve the EORTC measurement of PF.}, isbn = {1573-2649 (Electronic)0962-9343 (Linking)}, author = {Petersen, M. A. and Groenvold, M. and Aaronson, N. K. and Chie, W. C. and Conroy, T. and Costantini, A. and Fayers, P. and Helbostad, J. and Holzner, B. and Kaasa, S. and Singer, S. and Velikova, G. and Young, T.} } @article {2071, title = {Item Selection and Hypothesis Testing for the Adaptive Measurement of Change}, journal = {Applied Psychological Measurement}, volume = {34}, year = {2010}, pages = {238-254}, abstract = {

Assessing individual change is an important topic in both psychological and educational measurement. An adaptive measurement of change (AMC) method had previously been shown to exhibit greater efficiency in detecting change than conventional nonadaptive methods. However, little work had been done to compare different procedures within the AMC framework. This study introduced a new item selection criterion and two new test statistics for detecting change with AMC that were specifically designed for the paradigm of hypothesis testing. In two simulation sets, the new methods for detecting significant change improved on existing procedures by demonstrating better adherence to Type I error rates and substantially better power for detecting relatively small change.\ 

}, keywords = {change, computerized adaptive testing, individual change, Kullback{\textendash}Leibler information, likelihood ratio, measuring change}, doi = {10.1177/0146621609344844}, author = {Finkelman, M. D. and Weiss, D. J. and Kim-Kang, G.} } @inbook {1808, title = {Adaptive item calibration: A process for estimating item parameters within a computerized adaptive test}, year = {2009}, note = {{PDF File, 286 KB} {PDF File, 286 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2009 GMAC Conference on Computerized Adaptive Testing.}, abstract = {The characteristics of an adaptive test change the characteristics of the field testing that is necessary to add items to an existing measurement scale. The process used to add field-test items to the adaptive test might lead to scale drift or disrupt the test by administering items of inappropriate difficulty. The current study makes use of the transitivity of examinee and item in item response theory to describe a process for adaptive item calibration. In this process an item is successively administered to examinees whose ability levels match the performance of a given field-test item. By treating the item as if it were taking an adaptive test, examinees can be selected who provide the most information about the item at its momentary difficulty level. This should provide a more efficient procedure for estimating item parameters. The process is described within the context of the one-parameter logistic IRT model. The process is then simulated to identify whether it can be more accurate and efficient than random presentation of field-test items to examinees. Results indicated that adaptive item calibration might provide a viable approach to item calibration within the context of an adaptive test. It might be most useful for expanding item pools in settings with small sample sizes or needs for large numbers of items.}, author = {Kingsbury, G. G.} } @inbook {1805, title = {Adequacy of an item pool measuring proficiency in English language to implement a CAT procedure}, year = {2009}, note = {{PDF File, 160 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2009 GMAC Conference on Computerized Adaptive Testing.}, author = {Karino, C. A. and Costa, D. R. and Laros, J. A.} } @inbook {1749, title = {A comparison of three methods of item selection for computerized adaptive testing}, year = {2009}, note = {PDF file, 531 K}, address = {D. J. Weiss (Ed.), Proceedings of the 2009 GMAC Conference on Computerized Adaptive Testing.}, author = {Costa, D. R. and Karino, C. A. and Moura, F. A. S. and Andrade, D. F.} } @inbook {1884, title = {Criterion-related validity of an innovative CAT-based personality measure}, year = {2009}, note = {{PDF File, 163 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2009 GMAC Conference on Computerized Adaptive Testing.}, abstract = {This paper describes development and initial criterion-related validation of the PreVisor Computer Adaptive Personality Scales (PCAPS), a computerized adaptive testing-based personality measure that uses an ideal point IRT model based on forced-choice, paired-comparison responses. Based on results from a large consortium study, a composite of six PCAPS scales identified as relevant to the population of interest (first-line supervisors) had an estimated operational validity against an overall job performance criterion of ρ = .25. Uncorrected and corrected criterion-related validity results for each of the six PCAPS scales making up the composite are also reported. Because the PCAPS algorithm computes intermediate scale scores until a stopping rule is triggered, we were able to graph number of statement-pairs presented against criterion-related validities. Results showed generally monotonically increasing functions. However, asymptotic validity levels, or at least a reduction in the rate of increase in slope, were often reached after 5-7 statement-pairs were presented. In the case of the composite measure, there was some evidence that validities decreased after about six statement-pairs. A possible explanation for this is provided.}, author = {Schneider, R. J. and McLellan, R. A. and Kantrowitz, T. M. and Houston, J. S. and Borman, W. C.} } @inbook {1954, title = {Developing item variants: An empirical study}, year = {2009}, note = {{PDF file, 194 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2009 GMAC Conference on Computerized Adaptive Testing.}, abstract = {Large-scale standardized test have been widely used for educational and licensure testing. In computerized adaptive testing (CAT), one of the practical concerns for maintaining large-scale assessments is to ensure adequate numbers of high-quality items that are required for item pool functioning. Developing items at specific difficulty levels and for certain areas of test plans is a wellknown challenge. The purpose of this study was to investigate strategies for varying items that can effectively generate items at targeted difficulty levels and specific test plan areas. Each variant item generation model was developed by decomposing selected source items possessing ideal measurement properties and targeting the desirable content domains. 341 variant items were generated from 72 source items. Data were collected from six pretest periods. Items were calibrated using the Rasch model. Initial results indicate that variant items showed desirable measurement properties. Additionally, compared to an average of approximately 60\% of the items passing pretest criteria, an average of 84\% of the variant items passed the pretest criteria. }, author = {Wendt, A. and Kao, S. and Gorham, J. and Woo, A.} } @article {7, title = {Development and preliminary testing of a computerized adaptive assessment of chronic pain}, journal = {Journal of Pain}, volume = {10}, number = {9}, year = {2009}, note = {Anatchkova, Milena DSaris-Baglama, Renee NKosinski, MarkBjorner, Jakob B1R43AR052251-01A1/AR/NIAMS NIH HHS/United StatesEvaluation StudiesResearch Support, N.I.H., ExtramuralUnited StatesThe journal of pain : official journal of the American Pain SocietyJ Pain. 2009 Sep;10(9):932-43.}, month = {Sep}, pages = {932-943}, edition = {2009/07/15}, abstract = {The aim of this article is to report the development and preliminary testing of a prototype computerized adaptive test of chronic pain (CHRONIC PAIN-CAT) conducted in 2 stages: (1) evaluation of various item selection and stopping rules through real data-simulated administrations of CHRONIC PAIN-CAT; (2) a feasibility study of the actual prototype CHRONIC PAIN-CAT assessment system conducted in a pilot sample. Item calibrations developed from a US general population sample (N = 782) were used to program a pain severity and impact item bank (kappa = 45), and real data simulations were conducted to determine a CAT stopping rule. The CHRONIC PAIN-CAT was programmed on a tablet PC using QualityMetric{\textquoteright}s Dynamic Health Assessment (DYHNA) software and administered to a clinical sample of pain sufferers (n = 100). The CAT was completed in significantly less time than the static (full item bank) assessment (P < .001). On average, 5.6 items were dynamically administered by CAT to achieve a precise score. Scores estimated from the 2 assessments were highly correlated (r = .89), and both assessments discriminated across pain severity levels (P < .001, RV = .95). Patients{\textquoteright} evaluations of the CHRONIC PAIN-CAT were favorable. PERSPECTIVE: This report demonstrates that the CHRONIC PAIN-CAT is feasible for administration in a clinic. The application has the potential to improve pain assessment and help clinicians manage chronic pain.}, keywords = {*Computers, *Questionnaires, Activities of Daily Living, Adaptation, Psychological, Chronic Disease, Cohort Studies, Disability Evaluation, Female, Humans, Male, Middle Aged, Models, Psychological, Outcome Assessment (Health Care), Pain Measurement/*methods, Pain, Intractable/*diagnosis/psychology, Psychometrics, Quality of Life, User-Computer Interface}, isbn = {1528-8447 (Electronic)1526-5900 (Linking)}, author = {Anatchkova, M. D. and Saris-Baglama, R. N. and Kosinski, M. and Bjorner, J. B.} } @article {138, title = {Development of an item bank for the assessment of depression in persons with mental illnesses and physical diseases using Rasch analysis}, journal = {Rehabilitation Psychology}, volume = {54}, number = {2}, year = {2009}, note = {Forkmann, ThomasBoecker, MarenNorra, ChristineEberle, NicoleKircher, TiloSchauerte, PatrickMischke, KarlWesthofen, MartinGauggel, SiegfriedWirtz, MarkusResearch Support, Non-U.S. Gov{\textquoteright}tUnited StatesRehabilitation psychologyRehabil Psychol. 2009 May;54(2):186-97.}, month = {May}, pages = {186-97}, edition = {2009/05/28}, abstract = {OBJECTIVE: The calibration of item banks provides the basis for computerized adaptive testing that ensures high diagnostic precision and minimizes participants{\textquoteright} test burden. The present study aimed at developing a new item bank that allows for assessing depression in persons with mental and persons with somatic diseases. METHOD: The sample consisted of 161 participants treated for a depressive syndrome, and 206 participants with somatic illnesses (103 cardiologic, 103 otorhinolaryngologic; overall mean age = 44.1 years, SD =14.0; 44.7\% women) to allow for validation of the item bank in both groups. Persons answered a pool of 182 depression items on a 5-point Likert scale. RESULTS: Evaluation of Rasch model fit (infit < 1.3), differential item functioning, dimensionality, local independence, item spread, item and person separation (>2.0), and reliability (>.80) resulted in a bank of 79 items with good psychometric properties. CONCLUSIONS: The bank provides items with a wide range of content coverage and may serve as a sound basis for computerized adaptive testing applications. It might also be useful for researchers who wish to develop new fixed-length scales for the assessment of depression in specific rehabilitation settings.}, keywords = {Adaptation, Psychological, Adult, Aged, Depressive Disorder/*diagnosis/psychology, Diagnosis, Computer-Assisted, Female, Heart Diseases/*psychology, Humans, Male, Mental Disorders/*psychology, Middle Aged, Models, Statistical, Otorhinolaryngologic Diseases/*psychology, Personality Assessment/statistics \& numerical data, Personality Inventory/*statistics \& numerical data, Psychometrics/statistics \& numerical data, Questionnaires, Reproducibility of Results, Sick Role}, isbn = {0090-5550 (Print)0090-5550 (Linking)}, author = {Forkmann, T. and Boecker, M. and Norra, C. and Eberle, N. and Kircher, T. and Schauerte, P. and Mischke, K. and Westhofen, M. and Gauggel, S. and Wirtz, M.} } @article {131, title = {Evaluation of a computer-adaptive test for the assessment of depression (D-CAT) in clinical application}, journal = {International Journal for Methods in Psychiatric Research}, volume = {18}, number = {1}, year = {2009}, note = {Journal articleInternational journal of methods in psychiatric researchInt J Methods Psychiatr Res. 2009 Feb 4.}, month = {Feb 4}, pages = {233-236}, edition = {2009/02/06}, abstract = {In the past, a German Computerized Adaptive Test, based on Item Response Theory (IRT), was developed for purposes of assessing the construct depression [Computer-adaptive test for depression (D-CAT)]. This study aims at testing the feasibility and validity of the real computer-adaptive application.The D-CAT, supplied by a bank of 64 items, was administered on personal digital assistants (PDAs) to 423 consecutive patients suffering from psychosomatic and other medical conditions (78 with depression). Items were adaptively administered until a predetermined reliability (r >/= 0.90) was attained. For validation purposes, the Hospital Anxiety and Depression Scale (HADS), the Centre for Epidemiological Studies Depression (CES-D) scale, and the Beck Depression Inventory (BDI) were administered. Another sample of 114 patients was evaluated using standardized diagnostic interviews [Composite International Diagnostic Interview (CIDI)].The D-CAT was quickly completed (mean 74 seconds), well accepted by the patients and reliable after an average administration of only six items. In 95\% of the cases, 10 items or less were needed for a reliable score estimate. Correlations between the D-CAT and the HADS, CES-D, and BDI ranged between r = 0.68 and r = 0.77. The D-CAT distinguished between diagnostic groups as well as established questionnaires do.The D-CAT proved an efficient, well accepted and reliable tool. Discriminative power was comparable to other depression measures, whereby the CAT is shorter and more precise. Item usage raises questions of balancing the item selection for content in the future. Copyright (c) 2009 John Wiley \& Sons, Ltd.}, isbn = {1049-8931 (Print)}, author = {Fliege, H. and Becker, J. and Walter, O. B. and Rose, M. and Bjorner, J. B. and Klapp, B. F.} } @article {227, title = {An evaluation of patient-reported outcomes found computerized adaptive testing was efficient in assessing stress perception}, journal = {Journal of Clinical Epidemiology}, volume = {62}, number = {3}, year = {2009}, note = {Kocalevent, Ruya-DanielaRose, MatthiasBecker, JanineWalter, Otto BFliege, HerbertBjorner, Jakob BKleiber, DieterKlapp, Burghard FEvaluation StudiesUnited StatesJournal of clinical epidemiologyJ Clin Epidemiol. 2009 Mar;62(3):278-87, 287.e1-3. Epub 2008 Jul 18.}, pages = {278-287}, edition = {2008/07/22}, abstract = {OBJECTIVES: This study aimed to develop and evaluate a first computerized adaptive test (CAT) for the measurement of stress perception (Stress-CAT), in terms of the two dimensions: exposure to stress and stress reaction. STUDY DESIGN AND SETTING: Item response theory modeling was performed using a two-parameter model (Generalized Partial Credit Model). The evaluation of the Stress-CAT comprised a simulation study and real clinical application. A total of 1,092 psychosomatic patients (N1) were studied. Two hundred simulees (N2) were generated for a simulated response data set. Then the Stress-CAT was given to n=116 inpatients, (N3) together with established stress questionnaires as validity criteria. RESULTS: The final banks included n=38 stress exposure items and n=31 stress reaction items. In the first simulation study, CAT scores could be estimated with a high measurement precision (SE<0.32; rho>0.90) using 7.0+/-2.3 (M+/-SD) stress reaction items and 11.6+/-1.7 stress exposure items. The second simulation study reanalyzed real patients data (N1) and showed an average use of items of 5.6+/-2.1 for the dimension stress reaction and 10.0+/-4.9 for the dimension stress exposure. Convergent validity showed significantly high correlations. CONCLUSIONS: The Stress-CAT is short and precise, potentially lowering the response burden of patients in clinical decision making.}, keywords = {*Diagnosis, Computer-Assisted, Adolescent, Adult, Aged, Aged, 80 and over, Confidence Intervals, Female, Humans, Male, Middle Aged, Perception, Quality of Health Care/*standards, Questionnaires, Reproducibility of Results, Sickness Impact Profile, Stress, Psychological/*diagnosis/psychology, Treatment Outcome}, isbn = {1878-5921 (Electronic)0895-4356 (Linking)}, author = {Kocalevent, R. D. and Rose, M. and Becker, J. and Walter, O. B. and Fliege, H. and Bjorner, J. B. and Kleiber, D. and Klapp, B. F.} } @inbook {1798, title = {Features of J-CAT (Japanese Computerized Adaptive Test)}, year = {2009}, note = {{PDF File, 655KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2009 GMAC Conference on Computerized Adaptive Testing.}, author = {Imai, S. and Ito, S. and Nakamura, Y. and Kikuchi, K. and Akagi, Y. and Nakasono, H. and Honda, A. and Hiramura, T.} } @inbook {1766, title = {Item selection and hypothesis testing for the adaptive measurement of change}, year = {2009}, note = {{PDF File, 228 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2009 GMAC Conference on Computerized Adaptive Testing.}, author = {Finkelman, M. and Weiss, D. J. and Kim-Kang, G.} } @article {143, title = {Progress in assessing physical function in arthritis: PROMIS short forms and computerized adaptive testing}, journal = {Journal of Rheumatology}, volume = {36}, number = {9}, year = {2009}, note = {Fries, James FCella, DavidRose, MatthiasKrishnan, EswarBruce, BonnieU01 AR052158/AR/NIAMS NIH HHS/United StatesU01 AR52177/AR/NIAMS NIH HHS/United StatesConsensus Development ConferenceResearch Support, N.I.H., ExtramuralCanadaThe Journal of rheumatologyJ Rheumatol. 2009 Sep;36(9):2061-6.}, month = {Sep}, pages = {2061-2066}, edition = {2009/09/10}, abstract = {OBJECTIVE: Assessing self-reported physical function/disability with the Health Assessment Questionnaire Disability Index (HAQ) and other instruments has become central in arthritis research. Item response theory (IRT) and computerized adaptive testing (CAT) techniques can increase reliability and statistical power. IRT-based instruments can improve measurement precision substantially over a wider range of disease severity. These modern methods were applied and the magnitude of improvement was estimated. METHODS: A 199-item physical function/disability item bank was developed by distilling 1865 items to 124, including Legacy Health Assessment Questionnaire (HAQ) and Physical Function-10 items, and improving precision through qualitative and quantitative evaluation in over 21,000 subjects, which included about 1500 patients with rheumatoid arthritis and osteoarthritis. Four new instruments, (A) Patient-Reported Outcomes Measurement Information (PROMIS) HAQ, which evolved from the original (Legacy) HAQ; (B) "best" PROMIS 10; (C) 20-item static (short) forms; and (D) simulated PROMIS CAT, which sequentially selected the most informative item, were compared with the HAQ. RESULTS: Online and mailed administration modes yielded similar item and domain scores. The HAQ and PROMIS HAQ 20-item scales yielded greater information content versus other scales in patients with more severe disease. The "best" PROMIS 20-item scale outperformed the other 20-item static forms over a broad range of 4 standard deviations. The 10-item simulated PROMIS CAT outperformed all other forms. CONCLUSION: Improved items and instruments yielded better information. The PROMIS HAQ is currently available and considered validated. The new PROMIS short forms, after validation, are likely to represent further improvement. CAT-based physical function/disability assessment offers superior performance over static forms of equal length.}, keywords = {*Disability Evaluation, *Outcome Assessment (Health Care), Arthritis/diagnosis/*physiopathology, Health Surveys, Humans, Prognosis, Reproducibility of Results}, isbn = {0315-162X (Print)0315-162X (Linking)}, author = {Fries, J.F. and Cella, D. and Rose, M. and Krishnan, E. and Bruce, B.} } @article {590, title = {Adaptive measurement of individual change}, journal = {Zeitschrift f{\"u}r Psychologie / Journal of Psychology}, volume = {216}, year = {2008}, note = {{PDF file, 568 KB}}, pages = {49-58}, doi = {10.1027/0044-3409.216.1.49}, author = {Kim-Kang, G. and Weiss, D. J.} } @article {2181, title = {Computer-Based and Paper-and-Pencil Administration Mode Effects on a Statewide End-of-Course English Test}, journal = {Educational and Psychological Measurement}, volume = {68}, number = {4}, year = {2008}, pages = {554-570}, abstract = {

The current study compared student performance between paper-and-pencil testing (PPT) and computer-based testing (CBT) on a large-scale statewide end-of-course English examination. Analyses were conducted at both the item and test levels. The overall results suggest that scores obtained from PPT and CBT were comparable. However, at the content domain level, a rather large difference in the reading comprehension section suggests that reading comprehension test may be more affected by the test administration mode. Results from the confirmatory factor analysis suggest that the administration mode did not alter the construct of the test.

}, doi = {10.1177/0013164407310132}, url = {http://epm.sagepub.com/content/68/4/554.abstract}, author = {Kim, Do-Hong and Huynh, Huynh} } @article {231, title = {Computerized adaptive testing in back pain: Validation of the CAT-5D-QOL}, journal = {Spine}, volume = {33}, number = {12}, year = {2008}, note = {Kopec, Jacek ABadii, MaziarMcKenna, MarioLima, Viviane DSayre, Eric CDvorak, MarcelResearch Support, Non-U.S. Gov{\textquoteright}tValidation StudiesUnited StatesSpineSpine (Phila Pa 1976). 2008 May 20;33(12):1384-90.}, month = {May 20}, pages = {1384-90}, edition = {2008/05/23}, abstract = {STUDY DESIGN: We have conducted an outcome instrument validation study. OBJECTIVE: Our objective was to develop a computerized adaptive test (CAT) to measure 5 domains of health-related quality of life (HRQL) and assess its feasibility, reliability, validity, and efficiency. SUMMARY OF BACKGROUND DATA: Kopec and colleagues have recently developed item response theory based item banks for 5 domains of HRQL relevant to back pain and suitable for CAT applications. The domains are Daily Activities (DAILY), Walking (WALK), Handling Objects (HAND), Pain or Discomfort (PAIN), and Feelings (FEEL). METHODS: An adaptive algorithm was implemented in a web-based questionnaire administration system. The questionnaire included CAT-5D-QOL (5 scales), Modified Oswestry Disability Index (MODI), Roland-Morris Disability Questionnaire (RMDQ), SF-36 Health Survey, and standard clinical and demographic information. Participants were outpatients treated for mechanical back pain at a referral center in Vancouver, Canada. RESULTS: A total of 215 patients completed the questionnaire and 84 completed a retest. On average, patients answered 5.2 items per CAT-5D-QOL scale. Reliability ranged from 0.83 (FEEL) to 0.92 (PAIN) and was 0.92 for the MODI, RMDQ, and Physical Component Summary (PCS-36). The ceiling effect was 0.5\% for PAIN compared with 2\% for MODI and 5\% for RMQ. The CAT-5D-QOL scales correlated as anticipated with other measures of HRQL and discriminated well according to the level of satisfaction with current symptoms, duration of the last episode, sciatica, and disability compensation. The average relative discrimination index was 0.87 for PAIN, 0.67 for DAILY and 0.62 for WALK, compared with 0.89 for MODI, 0.80 for RMDQ, and 0.59 for PCS-36. CONCLUSION: The CAT-5D-QOL is feasible, reliable, valid, and efficient in patients with back pain. This methodology can be recommended for use in back pain research and should improve outcome assessment, facilitate comparisons across studies, and reduce patient burden.}, keywords = {*Disability Evaluation, *Health Status Indicators, *Quality of Life, Adult, Aged, Algorithms, Back Pain/*diagnosis/psychology, British Columbia, Diagnosis, Computer-Assisted/*standards, Feasibility Studies, Female, Humans, Internet, Male, Middle Aged, Predictive Value of Tests, Questionnaires/*standards, Reproducibility of Results}, isbn = {1528-1159 (Electronic)0362-2436 (Linking)}, author = {Kopec, J. A. and Badii, M. and McKenna, M. and Lima, V. D. and Sayre, E. C. and Dvorak, M.} } @conference {101, title = {Developing a progressive approach to using the GAIN in order to reduce the duration and cost of assessment with the GAIN short screener, Quick and computer adaptive testing}, booktitle = {Joint Meeting on Adolescent Treatment Effectiveness }, year = {2008}, note = {ProCite field[6]: Paper presented at the}, month = {2008}, address = {Washington D.C., USA}, author = {Dennis, M. L. and Funk, R. and Titus, J. and Riley, B. B. and Hosman, S. and Kinne, S.} } @article {20, title = {Functioning and validity of a computerized adaptive test to measure anxiety (A CAT)}, journal = {Depression and Anxiety}, volume = {25}, number = {12}, year = {2008}, pages = {E182-E194}, abstract = {Background: The aim of this study was to evaluate the Computerized Adaptive Test to measure anxiety (A-CAT), a patient-reported outcome questionnaire that uses computerized adaptive testing to measure anxiety. Methods: The A-CAT builds on an item bank of 50 items that has been built using conventional item analyses and item response theory analyses. The A-CAT was administered on Personal Digital Assistants to n=357 patients diagnosed and treated at the department of Psychosomatic Medicine and Psychotherapy, Charit{\'e} Berlin, Germany. For validation purposes, two subgroups of patients (n=110 and 125) answered the A-CAT along with established anxiety and depression questionnaires. Results: The A-CAT was fast to complete (on average in 2 min, 38 s) and a precise item response theory based CAT score (reliability>.9) could be estimated after 4{\textendash}41 items. On average, the CAT displayed 6 items (SD=4.2). Convergent validity of the A-CAT was supported by correlations to existing tools (Hospital Anxiety and Depression Scale-A, Beck Anxiety Inventory, Berliner Stimmungs-Fragebogen A/D, and State Trait Anxiety Inventory: r=.56{\textendash}.66); discriminant validity between diagnostic groups was higher for the A-CAT than for other anxiety measures. Conclusions: The German A-CAT is an efficient, reliable, and valid tool for assessing anxiety in patients suffering from anxiety disorders and other conditions with significant potential for initial assessment and long-term treatment monitoring. Future research directions are to explore content balancing of the item selection algorithm of the CAT, to norm the tool to a healthy sample, and to develop practical cutoff scores. Depression and Anxiety, 2008. {\textcopyright} 2008 Wiley-Liss, Inc.}, isbn = {1520-6394}, author = {Becker, J. and Fliege, H. and Kocalevent, R. D. and Bjorner, J. B. and Rose, M. and Walter, O. B. and Klapp, B. F.} } @article {225, title = {ICAT: An adaptive testing procedure for the identification of idiosyncratic knowledge patterns}, journal = {Zeitschrift f{\"u}r Psychologie}, volume = {216}, number = {1}, year = {2008}, pages = {40-48}, abstract = {

Traditional adaptive tests provide an efficient method for estimating student achievements levels, by adjusting the characteristicsof the test questions to match the performance of each student. These traditional adaptive tests are not designed to identify diosyncraticknowledge patterns. As students move through their education, they learn content in any number of different ways related to their learning style and cognitive development. This may result in a student having different achievement levels from one content area to another within a domain of content. This study investigates whether such idiosyncratic knowledge patterns exist. It discusses the differences between idiosyncratic knowledge patterns and multidimensionality. Finally, it proposes an adaptive testing procedure that can be used to identify a student\’s areas of strength and weakness more efficiently than current adaptive testing approaches. The findings of the study indicate that a fairly large number of students may have test results that are influenced by their idiosyncratic knowledge patterns. The findings suggest that these patterns persist across time for a large number of students, and that the differences in student performance between content areas within a subject domain are large enough to allow them to be useful in instruction. Given the existence of idiosyncratic patterns of knowledge, the proposed testing procedure may enable us to provide more useful information to teachers. It should also allow us to differentiate between idiosyncratic patterns or knowledge, and important mutidimensionality in the testing data.

}, keywords = {computerized adaptive testing}, author = {Kingsbury, G. G. and Houser, R.L.} } @article {594, title = {ICAT: An adaptive testing procedure for the identification of idiosyncratic knowledge patterns}, journal = {Zeitschrift f{\"u}r Psychologie / Journal of Psychology}, volume = {216(1)}, year = {2008}, pages = {40{\textendash}48}, author = {Kingsbury, G. G. and Houser, R.L.} } @article {117, title = {An initial application of computerized adaptive testing (CAT) for measuring disability in patients with low back pain}, journal = {BMC Musculoskelet Disorders}, volume = {9}, number = {1}, year = {2008}, note = {Journal articleBMC musculoskeletal disordersBMC Musculoskelet Disord. 2008 Dec 18;9(1):166.}, month = {Dec 18}, pages = {166}, edition = {2008/12/20}, abstract = {ABSTRACT: BACKGROUND: Recent approaches to outcome measurement involving Computerized Adaptive Testing (CAT) offer an approach for measuring disability in low back pain (LBP) in a way that can reduce the burden upon patient and professional. The aim of this study was to explore the potential of CAT in LBP for measuring disability as defined in the International Classification of Functioning, Disability and Health (ICF) which includes impairments, activity limitation, and participation restriction. METHODS: 266 patients with low back pain answered questions from a range of widely used questionnaires. An exploratory factor analysis (EFA) was used to identify disability dimensions which were then subjected to Rasch analysis. Reliability was tested by internal consistency and person separation index (PSI). Discriminant validity of disability levels were evaluated by Spearman correlation coefficient (r), intraclass correlation coefficient [ICC(2,1)] and the Bland-Altman approach. A CAT was developed for each dimension, and the results checked against simulated and real applications from a further 133 patients. RESULTS: Factor analytic techniques identified two dimensions named "body functions" and "activity-participation". After deletion of some items for failure to fit the Rasch model, the remaining items were mostly free of Differential Item Functioning (DIF) for age and gender. Reliability exceeded 0.90 for both dimensions. The disability levels generated using all items and those obtained from the real CAT application were highly correlated (i.e. >0.97 for both dimensions). On average, 19 and 14 items were needed to estimate the precise disability levels using the initial CAT for the first and second dimension. However, a marginal increase in the standard error of the estimate across successive iterations substantially reduced the number of items required to make an estimate. CONCLUSIONS: Using a combination approach of EFA and Rasch analysis this study has shown that it is possible to calibrate items onto a single metric in a way that can be used to provide the basis of a CAT application. Thus there is an opportunity to obtain a wide variety of information to evaluate the biopsychosocial model in its more complex forms, without necessarily increasing the burden of information collection for patients.}, isbn = {1471-2474 (Electronic)}, author = {Elhan, A. H. and Oztuna, D. and Kutlay, S. and Kucukdeveci, A. A. and Tennant, A.} } @article {152, title = {Using computerized adaptive testing to reduce the burden of mental health assessment}, journal = {Psychiatric Services}, volume = {59}, number = {4}, year = {2008}, note = {Gibbons, Robert DWeiss, David JKupfer, David JFrank, EllenFagiolini, AndreaGrochocinski, Victoria JBhaumik, Dulal KStover, AngelaBock, R DarrellImmekus, Jason CR01-MH-30915/MH/United States NIMHR01-MH-66302/MH/United States NIMHResearch Support, N.I.H., ExtramuralUnited StatesPsychiatric services (Washington, D.C.)Psychiatr Serv. 2008 Apr;59(4):361-8.}, month = {Apr}, pages = {361-8}, edition = {2008/04/02}, abstract = {OBJECTIVE: This study investigated the combination of item response theory and computerized adaptive testing (CAT) for psychiatric measurement as a means of reducing the burden of research and clinical assessments. METHODS: Data were from 800 participants in outpatient treatment for a mood or anxiety disorder; they completed 616 items of the 626-item Mood and Anxiety Spectrum Scales (MASS) at two times. The first administration was used to design and evaluate a CAT version of the MASS by using post hoc simulation. The second confirmed the functioning of CAT in live testing. RESULTS: Tests of competing models based on item response theory supported the scale{\textquoteright}s bifactor structure, consisting of a primary dimension and four group factors (mood, panic-agoraphobia, obsessive-compulsive, and social phobia). Both simulated and live CAT showed a 95\% average reduction (585 items) in items administered (24 and 30 items, respectively) compared with administration of the full MASS. The correlation between scores on the full MASS and the CAT version was .93. For the mood disorder subscale, differences in scores between two groups of depressed patients--one with bipolar disorder and one without--on the full scale and on the CAT showed effect sizes of .63 (p<.003) and 1.19 (p<.001) standard deviation units, respectively, indicating better discriminant validity for CAT. CONCLUSIONS: Instead of using small fixed-length tests, clinicians can create item banks with a large item pool, and a small set of the items most relevant for a given individual can be administered with no loss of information, yielding a dramatic reduction in administration time and patient and clinician burden.}, keywords = {*Diagnosis, Computer-Assisted, *Questionnaires, Adolescent, Adult, Aged, Agoraphobia/diagnosis, Anxiety Disorders/diagnosis, Bipolar Disorder/diagnosis, Female, Humans, Male, Mental Disorders/*diagnosis, Middle Aged, Mood Disorders/diagnosis, Obsessive-Compulsive Disorder/diagnosis, Panic Disorder/diagnosis, Phobic Disorders/diagnosis, Reproducibility of Results, Time Factors}, isbn = {1075-2730 (Print)}, author = {Gibbons, R. D. and Weiss, D. J. and Kupfer, D. J. and Frank, E. and Fagiolini, A. and Grochocinski, V. J. and Bhaumik, D. K. and Stover, A. and Bock, R. D. and Immekus, J. C.} } @inbook {1806, title = {Comparison of computerized adaptive testing and classical methods for measuring individual change}, year = {2007}, note = {{PDF file, 347 KB}}, address = {D. J. Weiss (Ed.). Proceedings of the 2007 GMAC Conference on Computerized Adaptive Testing.}, author = {Kim-Kang, G. and Weiss, D. J.} } @article {782, title = {Development and evaluation of a computer adaptive test for {\textquotedblleft}Anxiety{\textquotedblright} (Anxiety-CAT)}, journal = {Quality of Life Research}, volume = {16}, year = {2007}, pages = {143-155}, author = {Walter, O. B. and Becker, J. and Bjorner, J. B. and Fliege, H. and Klapp, B. F. and Rose, M.} } @inbook {1810, title = {ICAT: An adaptive testing procedure to allow the identification of idiosyncratic knowledge patterns}, year = {2007}, note = {{PDF file, 161 KB}}, address = {D. J. Weiss (Ed.). Proceedings of the 2007 GMAC Conference on Computerized Adaptive Testing.}, author = {Kingsbury, G. G. and Houser, R.L.} } @article {233, title = {An evaluation of a patient-reported outcomes found computerized adaptive testing was efficient in assessing osteoarthritis impact}, journal = {Journal of Clinical Epidemiology}, volume = {59}, number = {7}, year = {2006}, pages = {715-723}, abstract = {BACKGROUND AND OBJECTIVES: Evaluate a patient-reported outcomes questionnaire that uses computerized adaptive testing (CAT) to measure the impact of osteoarthritis (OA) on functioning and well-being. MATERIALS AND METHODS: OA patients completed 37 questions about the impact of OA on physical, social and role functioning, emotional well-being, and vitality. Questionnaire responses were calibrated and scored using item response theory, and two scores were estimated: a Total-OA score based on patients{\textquoteright} responses to all 37 questions, and a simulated CAT-OA score where the computer selected and scored the five most informative questions for each patient. Agreement between Total-OA and CAT-OA scores was assessed using correlations. Discriminant validity of Total-OA and CAT-OA scores was assessed with analysis of variance. Criterion measures included OA pain and severity, patient global assessment, and missed work days. RESULTS: Simulated CAT-OA and Total-OA scores correlated highly (r = 0.96). Both Total-OA and simulated CAT-OA scores discriminated significantly between patients differing on the criterion measures. F-statistics across criterion measures ranged from 39.0 (P < .001) to 225.1 (P < .001) for the Total-OA score, and from 40.5 (P < .001) to 221.5 (P < .001) for the simulated CAT-OA score. CONCLUSIONS: CAT methods produce valid and precise estimates of the impact of OA on functioning and well-being with significant reduction in response burden.}, isbn = {08954356}, author = {Kosinski, M. and Bjorner, J. and Warejr, J. and Sullivan, E. and Straus, W.} } @article {2197, title = {Constructing a Computerized Adaptive Test for University Applicants With Disabilities}, journal = {Applied Measurement in Education}, volume = {18}, number = {4}, year = {2005}, pages = {381-405}, doi = {10.1207/s15324818ame1804_3}, url = {http://www.tandfonline.com/doi/abs/10.1207/s15324818ame1804_3}, author = {Moshinsky, Avital and Kazin, Cathrael} } @article {546, title = {Development of a computer-adaptive test for depression (D-CAT)}, journal = {Quality of Life Research}, volume = {14}, year = {2005}, pages = {2277{\textendash}2291}, author = {Fliege, H. and Becker, J. and Walter, O. B. and Bjorner, J. B. and Klapp, B. F. and Rose, M.} } @article {219, title = {Toward efficient and comprehensive measurement of the alcohol problems continuum in college students: The Brief Young Adult Alcohol Consequences Questionnaire}, journal = {Alcoholism: Clinical \& Experimental Research}, volume = {29}, number = {7}, year = {2005}, note = {MiscellaneousArticleMiscellaneous Article}, pages = {1180-1189}, abstract = {Background: Although a number of measures of alcohol problems in college students have been studied, the psychometric development and validation of these scales have been limited, for the most part, to methods based on classical test theory. In this study, we conducted analyses based on item response theory to select a set of items for measuring the alcohol problem severity continuum in college students that balances comprehensiveness and efficiency and is free from significant gender bias., Method: We conducted Rasch model analyses of responses to the 48-item Young Adult Alcohol Consequences Questionnaire by 164 male and 176 female college students who drank on at least a weekly basis. An iterative process using item fit statistics, item severities, item discrimination parameters, model residuals, and analysis of differential item functioning by gender was used to pare the items down to those that best fit a Rasch model and that were most efficient in discriminating among levels of alcohol problems in the sample., Results: The process of iterative Rasch model analyses resulted in a final 24-item scale with the data fitting the unidimensional Rasch model very well. The scale showed excellent distributional properties, had items adequately matched to the severity of alcohol problems in the sample, covered a full range of problem severity, and appeared highly efficient in retaining all of the meaningful variance captured by the original set of 48 items., Conclusions: The use of Rasch model analyses to inform item selection produced a final scale that, in both its comprehensiveness and its efficiency, should be a useful tool for researchers studying alcohol problems in college students. To aid interpretation of raw scores, examples of the types of alcohol problems that are likely to be experienced across a range of selected scores are provided., (C)2005Research Society on AlcoholismAn important, sometimes controversial feature of all psychological phenomena is whether they are categorical or dimensional. A conceptual and psychometric framework is described for distinguishing whether the latent structure behind manifest categories (e.g., psychiatric diagnoses, attitude groups, or stages of development) is category-like or dimension-like. Being dimension-like requires (a) within-category heterogeneity and (b) between-category quantitative differences. Being category-like requires (a) within-category homogeneity and (b) between-category qualitative differences. The relation between this classification and abrupt versus smooth differences is discussed. Hybrid structures are possible. Being category-like is itself a matter of degree; the authors offer a formalized framework to determine this degree. Empirical applications to personality disorders, attitudes toward capital punishment, and stages of cognitive development illustrate the approach., (C) 2005 by the American Psychological AssociationThe authors conducted Rasch model ( G. Rasch, 1960) analyses of items from the Young Adult Alcohol Problems Screening Test (YAAPST; S. C. Hurlbut \& K. J. Sher, 1992) to examine the relative severity and ordering of alcohol problems in 806 college students. Items appeared to measure a single dimension of alcohol problem severity, covering a broad range of the latent continuum. Items fit the Rasch model well, with less severe symptoms reliably preceding more severe symptoms in a potential progression toward increasing levels of problem severity. However, certain items did not index problem severity consistently across demographic subgroups. A shortened, alternative version of the YAAPST is proposed, and a norm table is provided that allows for a linking of total YAAPST scores to expected symptom expression., (C) 2004 by the American Psychological AssociationA didactic on latent growth curve modeling for ordinal outcomes is presented. The conceptual aspects of modeling growth with ordinal variables and the notion of threshold invariance are illustrated graphically using a hypothetical example. The ordinal growth model is described in terms of 3 nested models: (a) multivariate normality of the underlying continuous latent variables (yt) and its relationship with the observed ordinal response pattern (Yt), (b) threshold invariance over time, and (c) growth model for the continuous latent variable on a common scale. Algebraic implications of the model restrictions are derived, and practical aspects of fitting ordinal growth models are discussed with the help of an empirical example and Mx script ( M. C. Neale, S. M. Boker, G. Xie, \& H. H. Maes, 1999). The necessary conditions for the identification of growth models with ordinal data and the methodological implications of the model of threshold invariance are discussed., (C) 2004 by the American Psychological AssociationRecent research points toward the viability of conceptualizing alcohol problems as arrayed along a continuum. Nevertheless, modern statistical techniques designed to scale multiple problems along a continuum (latent trait modeling; LTM) have rarely been applied to alcohol problems. This study applies LTM methods to data on 110 problems reported during in-person interviews of 1,348 middle-aged men (mean age = 43) from the general population. The results revealed a continuum of severity linking the 110 problems, ranging from heavy and abusive drinking, through tolerance and withdrawal, to serious complications of alcoholism. These results indicate that alcohol problems can be arrayed along a dimension of severity and emphasize the relevance of LTM to informing the conceptualization and assessment of alcohol problems., (C) 2004 by the American Psychological AssociationItem response theory (IRT) is supplanting classical test theory as the basis for measures development. This study demonstrated the utility of IRT for evaluating DSM-IV diagnostic criteria. Data on alcohol, cannabis, and cocaine symptoms from 372 adult clinical participants interviewed with the Composite International Diagnostic Interview-Expanded Substance Abuse Module (CIDI-SAM) were analyzed with Mplus ( B. Muthen \& L. Muthen, 1998) and MULTILOG ( D. Thissen, 1991) software. Tolerance and legal problems criteria were dropped because of poor fit with a unidimensional model. Item response curves, test information curves, and testing of variously constrained models suggested that DSM-IV criteria in the CIDI-SAM discriminate between only impaired and less impaired cases and may not be useful to scale case severity. IRT can be used to study the construct validity of DSM-IV diagnoses and to identify diagnostic criteria with poor performance., (C) 2004 by the American Psychological AssociationThis study examined the psychometric characteristics of an index of substance use involvement using item response theory. The sample consisted of 292 men and 140 women who qualified for a Diagnostic and Statistical Manual of Mental Disorders (3rd ed., rev.; American Psychiatric Association, 1987) substance use disorder (SUD) diagnosis and 293 men and 445 women who did not qualify for a SUD diagnosis. The results indicated that men had a higher probability of endorsing substance use compared with women. The index significantly predicted health, psychiatric, and psychosocial disturbances as well as level of substance use behavior and severity of SUD after a 2-year follow-up. Finally, this index is a reliable and useful prognostic indicator of the risk for SUD and the medical and psychosocial sequelae of drug consumption., (C) 2002 by the American Psychological AssociationComparability, validity, and impact of loss of information of a computerized adaptive administration of the Minnesota Multiphasic Personality Inventory-2 (MMPI-2) were assessed in a sample of 140 Veterans Affairs hospital patients. The countdown method ( Butcher, Keller, \& Bacon, 1985) was used to adaptively administer Scales L (Lie) and F (Frequency), the 10 clinical scales, and the 15 content scales. Participants completed the MMPI-2 twice, in 1 of 2 conditions: computerized conventional test-retest, or computerized conventional-computerized adaptive. Mean profiles and test-retest correlations across modalities were comparable. Correlations between MMPI-2 scales and criterion measures supported the validity of the countdown method, although some attenuation of validity was suggested for certain health-related items. Loss of information incurred with this mode of adaptive testing has minimal impact on test validity. Item and time savings were substantial., (C) 1999 by the American Psychological Association}, keywords = {Psychometrics, Substance-Related Disorders}, author = {Kahler, C. W. and Strong, D. R. and Read, J. P. and De Boeck, P. and Wilson, M. and Acton, G. S. and Palfai, T. P. and Wood, M. D. and Mehta, P. D. and Neale, M. C. and Flay, B. R. and Conklin, C. A. and Clayton, R. R. and Tiffany, S. T. and Shiffman, S. and Krueger, R. F. and Nichol, P. E. and Hicks, B. M. and Markon, K. E. and Patrick, C. J. and Iacono, William G. and McGue, Matt and Langenbucher, J. W. and Labouvie, E. and Martin, C. S. and Sanjuan, P. M. and Bavly, L. and Kirisci, L. and Chung, T. and Vanyukov, M. and Dunn, M. and Tarter, R. and Handel, R. W. and Ben-Porath, Y. S. and Watt, M.} } @article {281, title = {The use of person-fit statistics in computerized adaptive testing}, year = {2005}, month = {September, 2005}, institution = {Law School Administration Council}, address = {Newton, PA. USA}, isbn = {Computerized Testing Report 97-14}, author = {Meijer, R. R. and van Krimpen-Stoop, E. M. L. A.} } @conference {977, title = {Computer adaptive testing and the No Child Left Behind Act}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2004}, note = {{PDF file, 117 KB}}, address = {San Diego CA}, author = {Kingsbury, G. G. and Hauser, C.} } @article {147, title = {Computerized adaptive measurement of depression: A simulation study}, journal = {BMC Psychiatry}, volume = {4}, number = {1}, year = {2004}, pages = {13-23}, abstract = {Background: Efficient, accurate instruments for measuring depression are increasingly importantin clinical practice. We developed a computerized adaptive version of the Beck DepressionInventory (BDI). We examined its efficiency and its usefulness in identifying Major DepressiveEpisodes (MDE) and in measuring depression severity.Methods: Subjects were 744 participants in research studies in which each subject completed boththe BDI and the SCID. In addition, 285 patients completed the Hamilton Depression Rating Scale.Results: The adaptive BDI had an AUC as an indicator of a SCID diagnosis of MDE of 88\%,equivalent to the full BDI. The adaptive BDI asked fewer questions than the full BDI (5.6 versus 21items). The adaptive latent depression score correlated r = .92 with the BDI total score and thelatent depression score correlated more highly with the Hamilton (r = .74) than the BDI total scoredid (r = .70).Conclusions: Adaptive testing for depression may provide greatly increased efficiency withoutloss of accuracy in identifying MDE or in measuring depression severity.}, keywords = {*Computer Simulation, Adult, Algorithms, Area Under Curve, Comparative Study, Depressive Disorder/*diagnosis/epidemiology/psychology, Diagnosis, Computer-Assisted/*methods/statistics \& numerical data, Factor Analysis, Statistical, Female, Humans, Internet, Male, Mass Screening/methods, Patient Selection, Personality Inventory/*statistics \& numerical data, Pilot Projects, Prevalence, Psychiatric Status Rating Scales/*statistics \& numerical data, Psychometrics, Research Support, Non-U.S. Gov{\textquoteright}t, Research Support, U.S. Gov{\textquoteright}t, P.H.S., Severity of Illness Index, Software}, author = {Gardner, W. and Shear, K. and Kelleher, K. J. and Pajer, K. A. and Mammen, O. and Buysse, D. and Frank, E.} } @inbook {1737, title = {Computerized adaptive testing and item banking}, year = {2004}, note = {{PDF file 371 KB}}, address = {P. M. Fayers and R. D. Hays (Eds.) Assessing Quality of Life. Oxford: Oxford University Press.}, author = {Bjorner, J. B. and Kosinski, M. and Ware, J. E and Jr.} } @article {2114, title = {Computerized Adaptive Testing With Multiple-Form Structures}, journal = {Applied Psychological Measurement}, volume = {28}, number = {3}, year = {2004}, pages = {147-164}, abstract = {

A multiple-form structure (MFS) is an orderedcollection or network of testlets (i.e., sets of items).An examinee\’s progression through the networkof testlets is dictated by the correctness of anexaminee\’s answers, thereby adapting the test tohis or her trait level. The collection of pathsthrough the network yields the set of all possibletest forms, allowing test specialists the opportunityto review them before they are administered. Also,limiting the exposure of an individual MFS to aspecific period of time can enhance test security.This article provides an overview of methods thathave been developed to generate parallel MFSs.The approach is applied to the assembly of anexperimental computerized Law School Admission Test (LSAT).

}, doi = {10.1177/0146621604263652}, url = {http://apm.sagepub.com/content/28/3/147.abstract}, author = {Armstrong, Ronald D. and Jones, Douglas H. and Koppel, Nicole B. and Pashley, Peter J.} } @article {11, title = {Computerized adaptive testing with multiple-form structures}, journal = {Applied Psychological Measurement}, volume = {28}, number = {3}, year = {2004}, pages = {147-164}, publisher = {Sage Publications: US}, abstract = {A multiple-form structure (MFS) is an ordered collection or network of testlets (i.e., sets of items). An examinee{\textquoteright}s progression through the network of testlets is dictated by the correctness of an examinee{\textquoteright}s answers, thereby adapting the test to his or her trait level. The collection of paths through the network yields the set of all possible test forms, allowing test specialists the opportunity to review them before they are administered. Also, limiting the exposure of an individual MFS to a specific period of time can enhance test security. This article provides an overview of methods that have been developed to generate parallel MFSs. The approach is applied to the assembly of an experimental computerized Law School Admission Test (LSAT). (PsycINFO Database Record (c) 2007 APA, all rights reserved)}, keywords = {computerized adaptive testing, Law School Admission Test, multiple-form structure, testlets}, isbn = {0146-6216 (Print)}, author = {Armstrong, R. D. and Jones, D. H. and Koppel, N. B. and Pashley, P. J.} } @article {167, title = {Score comparability of short forms and computerized adaptive testing: Simulation study with the activity measure for post-acute care}, journal = {Archives of Physical Medicine and Rehabilitation}, volume = {85}, number = {4}, year = {2004}, note = {Haley, Stephen MCoster, Wendy JAndres, Patricia LKosinski, MarkNi, PengshengR01 hd43568/hd/nichdComparative StudyMulticenter StudyResearch Support, U.S. Gov{\textquoteright}t, Non-P.H.S.Research Support, U.S. Gov{\textquoteright}t, P.H.S.United StatesArchives of physical medicine and rehabilitationArch Phys Med Rehabil. 2004 Apr;85(4):661-6.}, month = {Apr}, pages = {661-6}, edition = {2004/04/15}, abstract = {OBJECTIVE: To compare simulated short-form and computerized adaptive testing (CAT) scores to scores obtained from complete item sets for each of the 3 domains of the Activity Measure for Post-Acute Care (AM-PAC). DESIGN: Prospective study. SETTING: Six postacute health care networks in the greater Boston metropolitan area, including inpatient acute rehabilitation, transitional care units, home care, and outpatient services. PARTICIPANTS: A convenience sample of 485 adult volunteers who were receiving skilled rehabilitation services. INTERVENTIONS: Not applicable. MAIN OUTCOME MEASURES: Inpatient and community-based short forms and CAT applications were developed for each of 3 activity domains (physical \& mobility, personal care \& instrumental, applied cognition) using item pools constructed from new items and items from existing postacute care instruments. RESULTS: Simulated CAT scores correlated highly with score estimates from the total item pool in each domain (4- and 6-item CAT r range,.90-.95; 10-item CAT r range,.96-.98). Scores on the 10-item short forms constructed for inpatient and community settings also provided good estimates of the AM-PAC item pool scores for the physical \& movement and personal care \& instrumental domains, but were less consistent in the applied cognition domain. Confidence intervals around individual scores were greater in the short forms than for the CATs. CONCLUSIONS: Accurate scoring estimates for AM-PAC domains can be obtained with either the setting-specific short forms or the CATs. The strong relationship between CAT and item pool scores can be attributed to the CAT{\textquoteright}s ability to select specific items to match individual responses. The CAT may have additional advantages over short forms in practicality, efficiency, and the potential for providing more precise scoring estimates for individuals.}, keywords = {Boston, Factor Analysis, Statistical, Humans, Outcome Assessment (Health Care)/*methods, Prospective Studies, Questionnaires/standards, Rehabilitation/*standards, Subacute Care/*standards}, isbn = {0003-9993 (Print)}, author = {Haley, S. M. and Coster, W. J. and Andres, P. L. and Kosinski, M. and Ni, P.} } @article {2084, title = {Validating the German computerized adaptive test for anxiety on healthy sample (A-CAT)}, journal = {Quality of Life Research}, volume = {13}, year = {2004}, pages = {1515}, author = {Becker, J. and Walter, O. B. and Fliege, H. and Bjorner, J. B. and Kocalevent, R. D. and Schmid, G. and Klapp, B. F. and Rose, M.} } @conference {989, title = {Calibrating CAT pools and online pretest items using nonparametric and adjusted marginal maximum likelihood methods}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2003}, note = {PDF file, 128 K}, address = {Chicago IL}, author = {Krass, I. A. and Williams, B.} } @article {30, title = {Calibration of an item pool for assessing the burden of headaches: an application of item response theory to the Headache Impact Test (HIT)}, journal = {Quality of Life Research}, volume = {12}, number = {8}, year = {2003}, note = {0962-9343Journal Article}, pages = {913-933}, abstract = {BACKGROUND: Measurement of headache impact is important in clinical trials, case detection, and the clinical monitoring of patients. Computerized adaptive testing (CAT) of headache impact has potential advantages over traditional fixed-length tests in terms of precision, relevance, real-time quality control and flexibility. OBJECTIVE: To develop an item pool that can be used for a computerized adaptive test of headache impact. METHODS: We analyzed responses to four well-known tests of headache impact from a population-based sample of recent headache sufferers (n = 1016). We used confirmatory factor analysis for categorical data and analyses based on item response theory (IRT). RESULTS: In factor analyses, we found very high correlations between the factors hypothesized by the original test constructers, both within and between the original questionnaires. These results suggest that a single score of headache impact is sufficient. We established a pool of 47 items which fitted the generalized partial credit IRT model. By simulating a computerized adaptive health test we showed that an adaptive test of only five items had a very high concordance with the score based on all items and that different worst-case item selection scenarios did not lead to bias. CONCLUSION: We have established a headache impact item pool that can be used in CAT of headache impact.}, keywords = {*Cost of Illness, *Decision Support Techniques, *Sickness Impact Profile, Adolescent, Adult, Aged, Comparative Study, Disability Evaluation, Factor Analysis, Statistical, Headache/*psychology, Health Surveys, Human, Longitudinal Studies, Middle Aged, Migraine/psychology, Models, Psychological, Psychometrics/*methods, Quality of Life/*psychology, Software, Support, Non-U.S. Gov{\textquoteright}t}, author = {Bjorner, J. B. and Kosinski, M. and Ware, J. E., Jr.} } @conference {860, title = {A comparison of item exposure control procedures using a CAT system based on the generalized partial credit model}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2003}, note = {{PDF file, 265 KB}}, address = {Chicago IL}, author = {Burt, W. M and Kim, S.-J and Davis, L. L. and Dodd, B. G.} } @article {31, title = {The feasibility of applying item response theory to measures of migraine impact: a re-analysis of three clinical studies}, journal = {Quality of Life Research}, volume = {12}, number = {8}, year = {2003}, note = {0962-9343Journal Article}, pages = {887-902}, abstract = {BACKGROUND: Item response theory (IRT) is a powerful framework for analyzing multiitem scales and is central to the implementation of computerized adaptive testing. OBJECTIVES: To explain the use of IRT to examine measurement properties and to apply IRT to a questionnaire for measuring migraine impact--the Migraine Specific Questionnaire (MSQ). METHODS: Data from three clinical studies that employed the MSQ-version 1 were analyzed by confirmatory factor analysis for categorical data and by IRT modeling. RESULTS: Confirmatory factor analyses showed very high correlations between the factors hypothesized by the original test constructions. Further, high item loadings on one common factor suggest that migraine impact may be adequately assessed by only one score. IRT analyses of the MSQ were feasible and provided several suggestions as to how to improve the items and in particular the response choices. Out of 15 items, 13 showed adequate fit to the IRT model. In general, IRT scores were strongly associated with the scores proposed by the original test developers and with the total item sum score. Analysis of response consistency showed that more than 90\% of the patients answered consistently according to a unidimensional IRT model. For the remaining patients, scores on the dimension of emotional function were less strongly related to the overall IRT scores that mainly reflected role limitations. Such response patterns can be detected easily using response consistency indices. Analysis of test precision across score levels revealed that the MSQ was most precise at one standard deviation worse than the mean impact level for migraine patients that are not in treatment. Thus, gains in test precision can be achieved by developing items aimed at less severe levels of migraine impact. CONCLUSIONS: IRT proved useful for analyzing the MSQ. The approach warrants further testing in a more comprehensive item pool for headache impact that would enable computerized adaptive testing.}, keywords = {*Sickness Impact Profile, Adolescent, Adult, Aged, Comparative Study, Cost of Illness, Factor Analysis, Statistical, Feasibility Studies, Female, Human, Male, Middle Aged, Migraine/*psychology, Models, Psychological, Psychometrics/instrumentation/*methods, Quality of Life/*psychology, Questionnaires, Support, Non-U.S. Gov{\textquoteright}t}, author = {Bjorner, J. B. and Kosinski, M. and Ware, J. E., Jr.} } @inbook {414, title = {Item selection in polytomous CAT}, booktitle = {New developments in psychometrics}, year = {2003}, pages = {207{\textendash}214}, publisher = {Psychometric Society, Springer}, organization = {Psychometric Society, Springer}, address = {Tokyo, Japan}, keywords = {computerized adaptive testing}, author = {Veldkamp, B. P.}, editor = {A. Okada and K. Shigenasu and Y. Kano and J. Meulman} } @conference {943, title = {Recalibration of IRT item parameters in CAT: Sparse data matrices and missing data treatments}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2003}, note = {(PDF file, 626 K}, address = {Chicago IL}, author = {Harmes, J. C. and Parshall, C. G. and Kromrey, J. D.} } @article {407, title = {Using response times to detect aberrant responses in computerized adaptive testing}, journal = {Psychometrika}, volume = {68}, number = {2}, year = {2003}, pages = {251-265}, abstract = {A lognormal model for response times is used to check response times for aberrances in examinee behavior on computerized adaptive tests. Both classical procedures and Bayesian posterior predictive checks are presented. For a fixed examinee, responses and response times are independent; checks based on response times offer thus information independent of the results of checks on response patterns. Empirical examples of the use of classical and Bayesian checks for detecting two different types of aberrances in response times are presented. The detection rates for the Bayesian checks outperformed those for the classical checks, but at the cost of higher false-alarm rates. A guideline for the choice between the two types of checks is offered.}, keywords = {Adaptive Testing, Behavior, Computer Assisted Testing, computerized adaptive testing, Models, person Fit, Prediction, Reaction Time}, author = {van der Linden, W. J. and van Krimpen-Stoop, E. M. L. A.} } @article {411, title = {Detection of person misfit in computerized adaptive tests with polytomous items}, journal = {Applied Psychological Measurement}, volume = {26}, number = {2}, year = {2002}, pages = {164-180}, abstract = {Item scores that do not fit an assumed item response theory model may cause the latent trait value to be inaccurately estimated. For a computerized adaptive test (CAT) using dichotomous items, several person-fit statistics for detecting mis.tting item score patterns have been proposed. Both for paper-and-pencil (P\&P) tests and CATs, detection ofperson mis.t with polytomous items is hardly explored. In this study, the nominal and empirical null distributions ofthe standardized log-likelihood statistic for polytomous items are compared both for P\&P tests and CATs. Results showed that the empirical distribution of this statistic differed from the assumed standard normal distribution for both P\&P tests and CATs. Second, a new person-fit statistic based on the cumulative sum (CUSUM) procedure from statistical process control was proposed. By means ofsimulated data, critical values were determined that can be used to classify a pattern as fitting or misfitting. The effectiveness of the CUSUM to detect simulees with item preknowledge was investigated. Detection rates using the CUSUM were high for realistic numbers ofdisclosed items. }, author = {van Krimpen-Stoop, E. M. L. A. and Meijer, R. R.} } @conference {223, title = {An empirical comparison of achievement level estimates from adaptive tests and paper-and-pencil tests}, booktitle = {annual meeting of the American Educational Research Association}, year = {2002}, address = {New Orleans, LA. USA}, keywords = {computerized adaptive testing}, author = {Kingsbury, G. G.} } @conference {972, title = {An empirical comparison of achievement level estimates from adaptive tests and paper-and-pencil tests}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2002}, note = {{PDF file, 134 KB}}, address = {New Orleans LA}, author = {Kingsbury, G. G.} } @inbook {119, title = {Generating abstract reasoning items with cognitive theory}, booktitle = {Item generation for test development}, year = {2002}, note = {Using Smart Source ParsingItem generation for test development. (pp. 219-250). Mahwah, NJ : Lawrence Erlbaum Associates, Publishers. xxxii, 412 pp}, pages = {219-250}, publisher = {Lawrence Erlbaum Associates, Inc.}, organization = {Lawrence Erlbaum Associates, Inc.}, address = {Mahwah, N.J. USA}, abstract = {(From the chapter) Developed and evaluated a generative system for abstract reasoning items based on cognitive theory. The cognitive design system approach was applied to generate matrix completion problems. Study 1 involved developing the cognitive theory with 191 college students who were administered Set I and Set II of the Advanced Progressive Matrices. Study 2 examined item generation by cognitive theory. Study 3 explored the psychometric properties and construct representation of abstract reasoning test items with 728 young adults. Five structurally equivalent forms of Abstract Reasoning Test (ART) items were prepared from the generated item bank and administered to the Ss. In Study 4, the nomothetic span of construct validity of the generated items was examined with 728 young adults who were administered ART items, and 217 young adults who were administered ART items and the Advanced Progressive Matrices. Results indicate the matrix completion items were effectively generated by the cognitive design system approach. (PsycINFO Database Record (c) 2005 APA )}, keywords = {Cognitive Processes, Measurement, Reasoning, Test Construction, Test Items, Test Validity, Theories}, author = {Embretson, S. E.}, editor = {P. Kyllomen} } @article {146, title = {Multidimensional adaptive testing for mental health problems in primary care}, journal = {Medical Care}, volume = {40}, number = {9}, year = {2002}, note = {Gardner, WilliamKelleher, Kelly JPajer, Kathleen AMCJ-177022/PHS HHS/MH30915/MH/NIMH NIH HHS/MH50629/MH/NIMH NIH HHS/Med Care. 2002 Sep;40(9):812-23.}, month = {Sep}, pages = {812-23}, edition = {2002/09/10}, abstract = {OBJECTIVES: Efficient and accurate instruments for assessing child psychopathology are increasingly important in clinical practice and research. For example, screening in primary care settings can identify children and adolescents with disorders that may otherwise go undetected. However, primary care offices are notorious for the brevity of visits and screening must not burden patients or staff with long questionnaires. One solution is to shorten assessment instruments, but dropping questions typically makes an instrument less accurate. An alternative is adaptive testing, in which a computer selects the items to be asked of a patient based on the patient{\textquoteright}s previous responses. This research used a simulation to test a child mental health screen based on this technology. RESEARCH DESIGN: Using half of a large sample of data, a computerized version was developed of the Pediatric Symptom Checklist (PSC), a parental-report psychosocial problem screen. With the unused data, a simulation was conducted to determine whether the Adaptive PSC can reproduce the results of the full PSC with greater efficiency. SUBJECTS: PSCs were completed by parents on 21,150 children seen in a national sample of primary care practices. RESULTS: Four latent psychosocial problem dimensions were identified through factor analysis: internalizing problems, externalizing problems, attention problems, and school problems. A simulated adaptive test measuring these traits asked an average of 11.6 questions per patient, and asked five or fewer questions for 49\% of the sample. There was high agreement between the adaptive test and the full (35-item) PSC: only 1.3\% of screening decisions were discordant (kappa = 0.93). This agreement was higher than that obtained using a comparable length (12-item) short-form PSC (3.2\% of decisions discordant; kappa = 0.84). CONCLUSIONS: Multidimensional adaptive testing may be an accurate and efficient technology for screening for mental health problems in primary care settings.}, keywords = {Adolescent, Child, Child Behavior Disorders/*diagnosis, Child Health Services/*organization \& administration, Factor Analysis, Statistical, Female, Humans, Linear Models, Male, Mass Screening/*methods, Parents, Primary Health Care/*organization \& administration}, isbn = {0025-7079 (Print)0025-7079 (Linking)}, author = {Gardner, W. and Kelleher, K. J. and Pajer, K. A.} } @conference {987, title = {Application of score information for CAT pool development and its connection with "likelihood test information}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2001}, note = {$\#${PDF file, 392 KB}}, address = {Seattle WA}, author = {Krass, I. A.} } @article {295, title = {Concerns with computerized adaptive oral proficiency assessment. A commentary on "Comparing examinee attitudes Toward computer-assisted and other oral proficient assessments": Response to the Norris Commentary}, journal = {Language Learning and Technology}, volume = {5}, number = {2}, year = {2001}, pages = {95-108}, abstract = {Responds to an article on computerized adaptive second language (L2) testing, expressing concerns about the appropriateness of such tests for informing language educators about the language skills of L2 learners and users and fulfilling the intended purposes and achieving the desired consequences of language test use.The authors of the original article respond. (Author/VWL)}, author = {Norris, J. M. and Kenyon, D. M. and Malabonga, V.} } @article {604, title = {CUSUM-based person-fit statistics for adaptive testing}, journal = {Journal of Educational and Behavioral Statistics}, volume = {26}, year = {2001}, pages = {199-218}, author = {van Krimpen-Stoop, E. M. L. A. and Meijer, R. R.} } @conference {1084, title = {Nearest neighbors, simple strata, and probabilistic parameters: An empirical comparison of methods for item exposure control in CATs}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2001}, address = {Seattle WA}, author = {Parshall, C. G. and Kromrey, J. D. and Harmes, J. C. and Sentovich, C.} } @booklet {1402, title = {Online item parameter recalibration: Application of missing data treatments to overcome the effects of sparse data conditions in a computerized adaptive version of the MCAT}, year = {2001}, note = {{PDF file, 406 KB}}, address = {Unpublished manuscript}, author = {Harmes, J. C. and Kromrey, J. D. and Parshall, C. G.} } @conference {1212, title = {Using response times to detect aberrant behavior in computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2001}, address = {Seattle WA}, author = {van der Linden, W. J. and van Krimpen-Stoop, E. M. L. A.} } @conference {986, title = {Change in distribution of latent ability with item position in CAT sequence}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education in New Orleans}, year = {2000}, note = {{PDF file, 103 KB}}, address = {LA}, author = {Krass, I. A.} } @inbook {255, title = {Computer-adaptive testing: A methodology whose time has come}, booktitle = {Development of Computerised Middle School Achievement Tests}, volume = {69}, year = {2000}, publisher = {MESA}, organization = {MESA}, address = {Chicago, IL. USA}, keywords = {computerized adaptive testing}, author = {Linacre, J. M.}, editor = {Kang, U. and Jean, E. and Linacre, J. M.} } @booklet {1336, title = {Computerized adaptive rating scales (CARS): Development and evaluation of the concept}, year = {2000}, address = {(Institute Rep No. 350). Tampa FL: Personnel Decisions Research Institute.}, author = {Borman, W. C. and Hanson, M. A. and Kubisiak, U. C. and Buck, D. E.} } @inbook {1814, title = {Detecting person misfit in adaptive testing using statistical process control techniques}, year = {2000}, address = {W. J. van der Linden, and C. A. W. Glas (Editors). Computerized Adaptive Testing: Theory and Practice. Norwell MA: Kluwer.}, author = {van Krimpen-Stoop, E. M. L. A. and Meijer, R. R.} } @inbook {410, title = {Detecting person misfit in adaptive testing using statistical process control techniques}, booktitle = {Computer adaptive testing: Theory and practice}, year = {2000}, pages = {201-219}, publisher = {Kluwer Academic.}, organization = {Kluwer Academic.}, address = {Dordrecht, The Netherlands}, keywords = {person Fit}, author = {van Krimpen-Stoop, E. M. L. A. and Meijer, R. R.} } @booklet {1543, title = {Detection of person misfit in computerized adaptive testing with polytomous items (Research Report 00-01)}, year = {2000}, address = {Enschede, The Netherlands: University of Twente, Faculty of Educational Science and Technology, Department of Measurement and Data Analysis}, author = {van Krimpen-Stoop, E. M. L. A. and Meijer, R. R.} } @booklet {1409, title = {Estimating item parameters from classical indices for item pool development with a computerized classification test (Research Report 2000-4)}, year = {2000}, address = {Iowa City IA: ACT Inc}, author = {Huang, C.-Y. and Kalohn, J.C. and Lin, C.-J. and Spray, J.} } @booklet {1344, title = {Estimating item parameters from classical indices for item pool development with a computerized classification test (ACT Research 2000-4)}, year = {2000}, address = {Iowa City IA, ACT, Inc}, author = {Chang, C.-Y. and Kalohn, J.C. and Lin, C.-J. and Spray, J.} } @article {205, title = {Estimating Item Parameters from Classical Indices for Item Pool Development with a Computerized Classification Test. }, number = {Research Report 2000-4}, year = {2000}, institution = {ACT, Inc.}, address = {Iowa City, Iowa}, author = {Huang, C.-Y. and Kalohn, J.C. and Lin, C.-J. and Spray, J. A.} } @article {657, title = {Item exposure control in computer-adaptive testing: The use of freezing to augment stratification}, journal = {Florida Journal of Educational Research}, volume = {40}, year = {2000}, pages = {28-52}, author = {Parshall, C. and Harmes, J. C. and Kromrey, J. D.} } @article {232, title = {Lagrangian relaxation for constrained curve-fitting with binary variables: Applications in educational testing}, journal = {Dissertation Abstracts International Section A: Humanities and Social Sciences}, volume = {61}, number = {3-A}, year = {2000}, pages = {1063}, abstract = {This dissertation offers a mathematical programming approach to curve fitting with binary variables. Various Lagrangian Relaxation (LR) techniques are applied to constrained curve fitting. Applications in educational testing with respect to test assembly are utilized. In particular, techniques are applied to both static exams (i.e. conventional paper-and-pencil (P\&P)) and adaptive exams (i.e. a hybrid computerized adaptive test (CAT) called a multiple-forms structure (MFS)). This dissertation focuses on the development of mathematical models to represent these test assembly problems as constrained curve-fitting problems with binary variables and solution techniques for the test development. Mathematical programming techniques are used to generate parallel test forms with item characteristics based on item response theory. A binary variable is used to represent whether or not an item is present on a form. The problem of creating a test form is modeled as a network flow problem with additional constraints. In order to meet the target information and the test characteristic curves, a Lagrangian relaxation heuristic is applied to the problem. The Lagrangian approach works by multiplying the constraint by a "Lagrange multiplier" and adding it to the objective. By systematically varying the multiplier, the test form curves approach the targets. This dissertation explores modifications to Lagrangian Relaxation as it is applied to the classical paper-and-pencil exams. For the P\&P exams, LR techniques are also utilized to include additional practical constraints to the network problem, which limit the item selection. An MFS is a type of a computerized adaptive test. It is a hybrid of a standard CAT and a P\&P exam. The concept of an MFS will be introduced in this dissertation, as well as, the application of LR as it is applied to constructing parallel MFSs. The approach is applied to the Law School Admission Test for the assembly of the conventional P\&P test as well as an experimental computerized test using MFSs. (PsycINFO Database Record (c) 2005 APA )}, keywords = {Analysis, Educational Measurement, Mathematical Modeling, Statistical}, author = {Koppel, N. B.} } @article {603, title = {The null distribution of person-fit statistics for conventional and adaptive tests}, journal = {Applied Psychological Measurement}, volume = {23}, year = {2000}, pages = {327-345}, author = {van Krimpen-Stoop, E. M. L. A. and Meijer, R. R.} } @article {2089, title = {Practical issues in developing and maintaining a computerized adaptive testing program}, journal = {Psicologica}, volume = {21}, year = {2000}, pages = {135-155}, author = {Wise, S. L. and Kingsbury, G. G.} } @article {791, title = {Response to Hays et al and McHorney and Cohen: Practical implications of item response theory and computerized adaptive testing: A brief summary of ongoing studies of widely used headache impact scales}, journal = {Medical Care}, volume = {38}, year = {2000}, pages = {73-82}, author = {Ware, J. E., Jr. and Bjorner, J. B. and Kosinski, M.} } @conference {1085, title = {Sufficient simplicity or comprehensive complexity? A comparison of probabilitic and stratification methods of exposure control}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2000}, address = {New Orleans LA}, author = {Parshall, C. G. and Kromrey, J. D. and Hogarty, K. Y.} } @conference {963, title = {Test security and item exposure control for computer-based }, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Educatio}, year = {2000}, address = {Chicago}, author = {Kalohn, J.} } @booklet {1538, title = {Using response times to detect aberrant behavior in computerized adaptive testing (Research Report 00-09)}, year = {2000}, address = {Enschede, The Netherlands: University of Twente, Faculty of Educational Science and Technology, Department of Measurement and Data Analysis}, author = {van der Linden, W. J. and van Krimpen-Stoop, E. M. L. A.} } @conference {988, title = {Automated flawed item detection and graphical item used in on-line calibration of CAT-ASVAB. }, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1999}, address = {Montreal, Canada}, author = {Krass, I. A. and Thomasson, G. L.} } @conference {978, title = {A comparison of conventional and adaptive testing procedures for making single-point decisions}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1999}, note = {$\#$KI99-1}, address = {Montreal, Canada}, author = {Kingsbury, G. G. and A Zara} } @conference {1082, title = {Computerized testing {\textendash} Issues and applications (Mini-course manual)}, booktitle = {Annual Meeting of the National Council on Measurement in Education}, year = {1999}, address = {Montreal}, author = {Parshall, C. and Davey, T. and Spray, J. and Kalohn, J.} } @article {751, title = {CUSUM-based person-fit statistics for adaptive testing}, journal = {Journal of Educational and Behavioral Statistics}, volume = {26}, year = {1999}, pages = {199-218}, author = {van Krimpen-Stoop, E. M. L. A. and Meijer, R. R.} } @booklet {1542, title = {CUSUM-based person-fit statistics for adaptive testing (Research Report 99-05)}, year = {1999}, address = {Enschede, The Netherlands: University of Twente, Faculty of Educational Science and Technology, Department of Measurement and Data Analysis}, author = {van Krimpen-Stoop, E. M. L. A. and Meijer, R. R.} } @inbook {1809, title = {Developing computerized adaptive tests for school children}, year = {1999}, address = {F. Drasgow and J. B. Olson-Buchanan (Eds.), Innovations in computerized assessment (pp. 93-115). Mahwah NJ: Erlbaum.}, author = {Kingsbury, G. G. and Houser, R.L.} } @article {790, title = {Dynamic health assessments: The search for more practical and more precise outcomes measures}, journal = {Quality of Life Newsletter}, year = {1999}, note = {{PDF file, 75 KB}}, pages = {11-13}, author = {Ware, J. E., Jr. and Bjorner, J. B. and Kosinski, M.} } @article {220, title = {The effect of model misspecification on classification decisions made using a computerized test}, journal = {Journal of Educational Measurement}, volume = {36}, number = {1}, year = {1999}, note = {National Council on Measurement in Education, US}, pages = {47-59}, abstract = {Many computerized testing algorithms require the fitting of some item response theory (IRT) model to examinees{\textquoteright} responses to facilitate item selection, the determination of test stopping rules, and classification decisions. Some IRT models are thought to be particularly useful for small volume certification programs that wish to make the transition to computerized adaptive testing (CAT). The 1-parameter logistic model (1-PLM) is usually assumed to require a smaller sample size than the 3-parameter logistic model (3-PLM) for item parameter calibrations. This study examined the effects of model misspecification on the precision of the decisions made using the sequential probability ratio test. For this comparison, the 1-PLM was used to estimate item parameters, even though the items{\textquoteright} characteristics were represented by a 3-PLM. Results demonstrate that the 1-PLM produced considerably more decision errors under simulation conditions similar to a real testing environment, compared to the true model and to a fixed-form standard reference set of items. (PsycINFO Database Record (c) 2003 APA, all rights reserved).}, keywords = {computerized adaptive testing}, author = {Kalohn, J.C. and Spray, J. A.} } @article {234, title = {Evaluating the usefulness of computerized adaptive testing for medical in-course assessment}, journal = {Academic Medicine}, volume = {74}, number = {10}, year = {1999}, note = {Kreiter, C DFerguson, KGruppen, L DUnited statesAcademic medicine : journal of the Association of American Medical CollegesAcad Med. 1999 Oct;74(10):1125-8.}, month = {Oct}, pages = {1125-8}, edition = {1999/10/28}, abstract = {PURPOSE: This study investigated the feasibility of converting an existing computer-administered, in-course internal medicine test to an adaptive format. METHOD: A 200-item internal medicine extended matching test was used for this research. Parameters were estimated with commercially available software with responses from 621 examinees. A specially developed simulation program was used to retrospectively estimate the efficiency of the computer-adaptive exam format. RESULTS: It was found that the average test length could be shortened by almost half with measurement precision approximately equal to that of the full 200-item paper-and-pencil test. However, computer-adaptive testing with this item bank provided little advantage for examinees at the upper end of the ability continuum. An examination of classical item statistics and IRT item statistics suggested that adding more difficult items might extend the advantage to this group of examinees. CONCLUSIONS: Medical item banks presently used for incourse assessment might be advantageously employed in adaptive testing. However, it is important to evaluate the match between the items and the measurement objective of the test before implementing this format.}, keywords = {*Automation, *Education, Medical, Undergraduate, Educational Measurement/*methods, Humans, Internal Medicine/*education, Likelihood Functions, Psychometrics/*methods, Reproducibility of Results}, isbn = {1040-2446 (Print)}, author = {Kreiter, C. D. and Ferguson, K. and Gruppen, L. D.} } @conference {1004, title = {Formula score and direct optimization algorithms in CAT ASVAB on-line calibration}, booktitle = {Paper presented at the annual meeting of the *?*.}, year = {1999}, author = {Levine, M. V. and Krass, I. A.} } @conference {1083, title = {Item exposure in adaptive tests: An empirical investigation of control strategies}, booktitle = {Paper presented at the annual meeting of the Psychometric Society}, year = {1999}, address = {Lawrence KS}, author = {Parshall, C. and Hogarty, K. and Kromrey, J.} } @article {752, title = {The null distribution of person-fit statistics for conventional and adaptive tests}, journal = {Applied Psychological Measurement}, volume = {23}, year = {1999}, pages = {327-345}, author = {van Krimpen-Stoop, E. M. L. A. and Meijer, R. R.} } @conference {979, title = {A procedure to compare conventional and adaptive testing procedures for making single-point decisions}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1999}, address = {Montreal, Canada}, author = {Kingsbury, G. G. and A Zara} } @conference {971, title = {Standard errors of proficiency estimates in stratum scored CAT}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1999}, address = {Montreal, Canada}, author = {Kingsbury, G. G.} } @article {230, title = {Threats to score comparability with applications to performance assessments and computerized adaptive tests}, journal = {Educational Assessment}, volume = {6}, number = {2}, year = {1999}, pages = {73-96}, abstract = {Develops a conceptual framework that addresses score comparability for performance assessments, adaptive tests, paper-and-pencil tests, and alternate item pools for computerized tests. Outlines testing situation aspects that might threaten score comparability and describes procedures for evaluating the degree of score comparability. Suggests ways to minimize threats to comparability. (SLD)}, author = {Kolen, M. J.} } @article {599, title = {Threats to score comparability with applications to performance assessments and computerized adaptive tests}, journal = {Educational Assessment}, volume = {6}, year = {1999}, pages = {73-96}, author = {Kolen, M. J.} } @article {419, title = {The use of Rasch analysis to produce scale-free measurement of functional ability}, journal = {American Journal of Occupational Therapy}, volume = {53}, number = {1}, year = {1999}, note = {991250470272-9490Journal Article}, pages = {83-90}, abstract = {Innovative applications of Rasch analysis can lead to solutions for traditional measurement problems and can produce new assessment applications in occupational therapy and health care practice. First, Rasch analysis is a mechanism that translates scores across similar functional ability assessments, thus enabling the comparison of functional ability outcomes measured by different instruments. This will allow for the meaningful tracking of functional ability outcomes across the continuum of care. Second, once the item-difficulty order of an instrument or item bank is established by Rasch analysis, computerized adaptive testing can be used to target items to the patient{\textquoteright}s ability level, reducing assessment length by as much as one half. More importantly, Rasch analysis can provide the foundation for "equiprecise" measurement or the potential to have precise measurement across all levels of functional ability. The use of Rasch analysis to create scale-free measurement of functional ability demonstrates how this methodlogy can be used in practical applications of clinical and outcome assessment.}, keywords = {*Activities of Daily Living, Disabled Persons/*classification, Human, Occupational Therapy/*methods, Predictive Value of Tests, Questionnaires/standards, Sensitivity and Specificity}, author = {Velozo, C. A. and Kielhofner, G. and Lai, J-S.} } @conference {985, title = {Application of direct optimization for on-line calibration in computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1998}, note = {{PDF file, 146 KB}}, address = {San Diego CA}, author = {Krass, I. A.} } @conference {864, title = {Computerized adaptive rating scales that measure contextual performance}, booktitle = {Paper presented at the 3th annual conference of the Society for Industrial and Organizational Psychology}, year = {1998}, address = {Dallas TX}, author = {Borman, W. C. and Hanson, M. A. and Montowidlo, S. J and F Drasgow and Foster, L and Kubisiak, U. C.} } @conference {962, title = {Effect of item selection on item exposure rates within a computerized classification test}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1998}, address = {San Diego CA}, author = {Kalohn, J.C. and Spray, J. A.} } @booklet {1541, title = {Person fit based on statistical process control in an adaptive testing environment (Research Report 98-13)}, year = {1998}, address = {Enschede, The Netherlands: University of Twente, Faculty of Educational Science and Technology, Department of Measurement and Data Analysis}, author = {van Krimpen-Stoop, E. M. L. A. and Meijer, R. R.} } @booklet {1514, title = {The relationship between computer familiarity and performance on computer-based TOEFL test tasks (Research Report 98-08)}, year = {1998}, address = {Princeton NJ: Educational Testing Service}, author = {Taylor, C. and Jamieson, J. and Eignor, D. R. and Kirsch, I.} } @booklet {1475, title = {Simulating the null distribution of person-fit statistics for conventional and adaptive tests (Research Report 98-02)}, year = {1998}, address = {Enschede, The Netherlands: University of Twente, Faculty of Educational Science and Technology, Department of Measurement and Data Analysis}, author = {Meijer, R. R. and van Krimpen-Stoop, E. M. L. A.} } @booklet {1385, title = {Statistical tests for person misfit in computerized adaptive testing (Research Report 98-01)}, year = {1998}, address = {Enschede, The Netherlands : University of Twente, Faculty of Educational Science and Technology, Department of Measurement and Data Analysis}, author = {Glas, C. A. W. and Meijer, R. R. and van Krimpen-Stoop, E. M. L. A.} } @article {155, title = {Statistical tests for person misfit in computerized adaptive testing}, number = {98-01}, year = {1998}, pages = {28}, institution = {Faculty of Educational Science and Technology, Univeersity of Twente}, address = {Enschede, The Netherlands}, isbn = {98-01}, author = {Glas, C. A. W. and Meijer, R. R. and van Krimpen-Stoop, E. M.} } @conference {1241, title = {Evaluating comparability in computerized adaptive testing: A theoretical framework with an example}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1997}, address = {Chicago}, author = {Wang, T. and Kolen, M. J.} } @conference {984, title = {Getting more precision on computer adaptive testing}, booktitle = {Paper presented at the 62nd Annual meeting of Psychometric Society}, year = {1997}, address = {University of Tennessee, Knoxville, TN}, author = {Krass, I. A.} } @conference {969, title = {Item pool development and maintenance}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1997}, address = {Chicago IL}, author = {Kingsbury, G. G.} } @conference {970, title = {Some questions that must be addressed to develop and maintain an item pool for use in an adaptive test}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1997}, address = {Chicago IL}, author = {Kingsbury, G. G.} } @inbook {1892, title = {Validation of the experimental CAT-ASVAB system}, year = {1997}, address = {W. A. Sands, B. K. Waters, and J. R. McBride (Eds.), Computerized adaptive testing: From inquiry to operation. Washington, DC: American Psychological Association.}, author = {Segall, D. O. and Moreno, K. E. and Kieckhaefer, W. F. and Vicino, F. L. and J. R. McBride} } @conference {968, title = {Item review and adaptive testing}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1996}, address = {New York}, author = {Kingsbury, G. G.} } @article {104, title = {Computerized adaptive testing with polytomous items}, journal = {Applied Psychological Measurement}, volume = {19}, year = {1995}, pages = {5{\textendash}22.}, abstract = {Discusses polytomous item response theory models and the research that has been conducted to investigate a variety of possible operational procedures (item bank, item selection, trait estimation, stopping rule) for polytomous model-based computerized adaptive testing (PCAT). Studies are reviewed that compared PCAT systems based on competing item response theory models that are appropriate for the same measurement objective, as well as applications of PCAT in marketing and educational psychology. Directions for future research using PCAT are suggested.}, author = {Dodd, B. G. and De Ayala, R. J., and Koch, W. R.} } @article {2031, title = {Computerized Adaptive Testing With Polytomous Items}, journal = {Applied Psychological Measurement}, volume = {19}, year = {1995}, pages = {5-22}, author = {Dodd, B. G. and De Ayala, R. J. and Koch. W.R.,} } @conference {1174, title = {The effect of model misspecification on classification decisions made using a computerized test: 3-PLM vs. 1PLM (and UIRT versus MIRT)}, booktitle = {Paper presented at the Annual Meeting of the Psychometric Society}, year = {1995}, note = {$\#$SP95-01}, address = {Minneapolis, MN}, author = {Spray, J. A. and Kalohn, J.C. and Schulz, M. and Fleer, P. Jr.} } @conference {966, title = {The influence of examinee test-taking behavior motivation in computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1995}, note = {(ERIC No. ED392839)}, address = {San Francisco CA}, author = {Kim, J. and McLean, J. E.} } @article {229, title = {An investigation of procedures for computerized adaptive testing using the successive intervals Rasch model}, journal = {Educational and Psychological Measurement}, volume = {55}, number = {6}, year = {1995}, pages = {976-990.}, author = {Koch, W. R. and Dodd, B. G.} } @inbook {1757, title = {Prerequisite relationships for the adaptive assessment of knowledge}, year = {1995}, address = {Greer, J. (Ed.) Proceedings of AIED{\textquoteright}95, 7th World Conference on Artificial Intelligence in Education, Washington, DC, AACE 43-50.}, author = {Dowling, C. E. and Kaluscha, R.} } @article {221, title = {Monte Carlo simulation comparison of two-stage testing and computerized adaptive testing}, journal = {Dissertation Abstracts International Section A: Humanities \& Social Sciences}, volume = {54}, number = {7-A}, year = {1994}, pages = {2548}, keywords = {computerized adaptive testing}, author = {Kim, H-O.} } @article {224, title = {Assessing the utility of item response models: computerized adaptive testing}, journal = {Educational Measurement: Issues and Practice}, volume = {12}, number = {1}, year = {1993}, pages = {21-27}, keywords = {computerized adaptive testing}, author = {Kingsbury, G. G. and Houser, R.L.} } @article {106, title = {Computerized adaptive testing using the partial credit model: Effects of item pool characteristics and different stopping rules}, journal = {Educational and Psychological Measurement}, volume = {53}, number = {1}, year = {1993}, pages = {61-77.}, abstract = {Simulated datasets were used to research the effects of the systematic variation of three major variables on the performance of computerized adaptive testing (CAT) procedures for the partial credit model. The three variables studied were the stopping rule for terminating the CATs, item pool size, and the distribution of the difficulty of the items in the pool. Results indicated that the standard error stopping rule performed better across the variety of CAT conditions than the minimum information stopping rule. In addition it was found that item pools that consisted of as few as 30 items were adequate for CAT provided that the item pool was of medium difficulty. The implications of these findings for implementing CAT systems based on the partial credit model are discussed. }, author = {Dodd, B. G. and Koch, W. R. and De Ayala, R. J.,} } @conference {965, title = {Individual differences in computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the Mid-South Educational Research Association}, year = {1993}, address = {New Orleans LA}, author = {Kim, J.} } @conference {1269, title = {An investigation of restricted self-adapted testing}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1993}, address = {Atlanta GA}, author = {Wise, S. L. and Kingsbury, G. G. and Houser, R.L.} } @conference {967, title = {Monte Carlo simulation comparison of two-stage testing and computerized adaptive testing}, booktitle = {Paper presented at the meeting of the National Council on Measurement in Education}, year = {1993}, address = {Atlanta, GA}, author = {Kim, H. and Plake, B. S.} } @conference {976, title = {A practical examination of the use of free-response questions in computerized adaptive testing}, booktitle = {Paper presented to the annual meeting of the American Educational Research Association: Atlanta GA.}, year = {1993}, note = {{PDF file, 30 KB}}, author = {Kingsbury, G. G. and Houser, R.L.} } @article {529, title = {A comparison of the partial credit and graded response models in computerized adaptive testing}, journal = {Applied Measurement in Education}, volume = {5}, year = {1992}, pages = {17-34}, author = {De Ayala, R. J. and Dodd, B. G. and Koch, W. R.} } @article {778, title = {A comparison of the performance of simulated hierarchical and linear testlets}, journal = {Journal of Educational Measurement}, volume = {29}, year = {1992}, pages = {243-251}, author = {Wainer, H., and Kaplan, B. and Lewis, C.} } @conference {981, title = {Estimation of ability level by using only observable quantities in adaptive testing}, booktitle = {Paper presented at the annual meeting if the American Educational Research Association}, year = {1992}, address = {Chicago}, author = {Kirisci, L.} } @conference {947, title = {Scaling of two-stage adaptive test configurations for achievement testing}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1992}, address = {New Orleans LA}, author = {Hendrickson, A. B. and Kolen, M. J.} } @article {781, title = {Building algebra testlets: A comparison of hierarchical and linear structures}, journal = {Journal of Educational Measurement}, volume = {8}, year = {1991}, pages = {xxx-xxx}, author = {Wainer, H., and Lewis, C. and Kaplan, B. and Braswell, J.} } @article {235, title = {A comparison of paper-and-pencil, computer-administered, computerized feedback, and computerized adaptive testing methods for classroom achievement testing}, journal = {Dissertation Abstracts International}, volume = {52}, number = {5-A}, year = {1991}, pages = {1719}, keywords = {computerized adaptive testing}, author = {Kuan, Tsung Hao} } @article {593, title = {A comparison of procedures for content-sensitive item selection}, journal = {Applied Measurement in Education}, year = {1991}, author = {Kingsbury, G. G.} } @article {596, title = {A comparison of procedures for content-sensitive item selection in computerized adaptive tests}, journal = {Applied Measurement in Education}, volume = {4}, year = {1991}, pages = {241-261}, author = {Kingsbury, G. G. and A Zara} } @booklet {1554, title = {Some empirical guidelines for building testlets (Technical Report 91-56)}, year = {1991}, address = {Princeton NJ: Educational Testing Service, Program Statistics Research}, author = {Wainer, H., and Kaplan, B. and Lewis, C.} } @article {592, title = {Adapting adaptive testing: Using the MicroCAT Testing System in a local school district}, journal = {Educational Measurement: Issues and Practice}, volume = {29 (2)}, year = {1990}, pages = {3-6}, author = {Kingsbury, G. G.} } @booklet {1556, title = {An adaptive algebra test: A testlet-based, hierarchically structured test with validity-based scoring}, year = {1990}, note = {Princeton NJ: Educational testing Service.}, address = {ETS Technical Report 90-92}, author = {Wainer, H., and Lewis, C. and Kaplan, B, and Braswell, J.} } @conference {975, title = {Assessing the utility of item response models: Computerized adaptive testing}, booktitle = {A paper presented to the annual meeting of the National Council of Measurement in Education}, year = {1990}, address = {Boston MA}, author = {Kingsbury, G. G. and Houser, R.L.} } @article {598, title = {Computerized adaptive measurement of attitudes}, journal = {Measurement and Evaluation in Counseling and Development}, volume = {23}, year = {1990}, pages = {20-30}, author = {Koch, W. R. and Dodd, B. G. and Fitzpatrick, S. J.} } @article {98, title = {A simulation and comparison of flexilevel and Bayesian computerized adaptive testing}, journal = {Journal of Educational Measurement}, volume = {27}, number = {3}, year = {1990}, pages = {227-239}, abstract = {Computerized adaptive testing (CAT) is a testing procedure that adapts an examination to an examinee{\textquoteright}s ability by administering only items of appropriate difficulty for the examinee. In this study, the authors compared Lord{\textquoteright}s flexilevel testing procedure (flexilevel CAT) with an item response theory-based CAT using Bayesian estimation of ability (Bayesian CAT). Three flexilevel CATs, which differed in test length (36, 18, and 11 items), and three Bayesian CATs were simulated; the Bayesian CATs differed from one another in the standard error of estimate (SEE) used for terminating the test (0.25, 0.10, and 0.05). Results showed that the flexilevel 36- and 18-item CATs produced ability estimates that may be considered as accurate as those of the Bayesian CAT with SEE = 0.10 and comparable to the Bayesian CAT with SEE = 0.05. The authors discuss the implications for classroom testing and for item response theory-based CAT.}, keywords = {computerized adaptive testing}, author = {De Ayala, R. J., and Dodd, B. G. and Koch, W. R.} } @article {2019, title = {Adaptive and Conventional Versions of the DAT: The First Complete Test Battery Comparison}, journal = {Applied Psychological Measurement}, volume = {13}, year = {1989}, pages = {363-371}, author = {Henly, S. J. and Klebe, K. J. and J. R. McBride and Cudeck, R.} } @article {569, title = {Adaptive and conventional versions of the DAT: The first complete test battery comparison}, journal = {Applied Psychological Measurement}, volume = {13}, year = {1989}, pages = {363-371}, author = {Henly, S. J. and Klebe, K. J. and J. R. McBride and Cudeck, R.} } @conference {974, title = {Assessing the impact of using item parameter estimates obtained from paper-and-pencil testing for computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1989}, note = {$\#$KI89-01}, address = {San Francisco}, author = {Kingsbury, G. G. and Houser, R.L.} } @inbook {1816, title = {Die Optimierung der Mebgenauikeit beim branched adaptiven Testen [Optimization of measurement precision for branched-adaptive testing}, year = {1989}, address = {K. D. Kubinger (Ed.), Moderne Testtheorie Ein Abrib samt neusten Beitrgen [Modern test theory Overview and new issues] (pp. 187-218). Weinhem, Germany: Beltz.}, author = {Kubinger, K. D.} } @article {228, title = {An investigation of procedures for computerized adaptive testing using partial credit scoring}, journal = {Applied Measurement in Education}, volume = {2}, number = {4}, year = {1989}, pages = {335-357}, author = {Koch, W. R. and Dodd, B. G.} } @article {532, title = {Operational characteristics of adaptive testing procedures using the graded response model}, journal = {Applied Psychological Measurement}, volume = {13}, year = {1989}, pages = {129-143}, author = {Dodd, B. G. and Koch, W. R. and De Ayala, R. J.,} } @article {2017, title = {Operational Characteristics of Adaptive Testing Procedures Using the Graded Response Model}, journal = {Applied Psychological Measurement}, volume = {13}, year = {1989}, pages = {129-143}, author = {Dodd, B. G. and Koch, W. R. and De Ayala, R. J.} } @article {595, title = {Procedures for selecting items for computerized adaptive tests}, journal = {Applied Measurement in Education}, volume = {2}, year = {1989}, pages = {359-375}, author = {Kingsbury, G. G. and A Zara} } @article {588, title = {Tailored interviewing: An application of item response theory for personality measurement}, journal = {Journal of Personality Assessment}, volume = {53}, year = {1989}, pages = {502-519}, author = {Kamakura, W. A., and Balasubramanian, S. K.} } @article {796, title = {Assessment of academic skills of learning disabled students with classroom microcomputers}, journal = {School Psychology Review}, volume = {17}, year = {1988}, pages = {81-88}, author = {Watkins, M. W. and Kush, J. C.} } @conference {973, title = {A comparison of achievement level estimates from computerized adaptive testing and paper-and-pencil testing}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1988}, note = {{PDF file, 43 KB}}, address = {New Orleans LA}, author = {Kingsbury, G. G. and Houser, R.L.} } @conference {909, title = {Computerized adaptive attitude measurement: A comparison of the graded response and rating scale models}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1988}, address = {New Orleans}, author = {Dodd, B. G. and Koch, W. R. and De Ayala, R. J.,} } @article {591, title = {Computerized adaptive testing: A four-year-old pilot study shows that CAT can work}, journal = {Technological Horizons in Education}, volume = {16 (4)}, year = {1988}, pages = {73-76}, author = {Kingsbury, G. G. and et. al.} } @conference {980, title = {A predictive analysis approach to adaptive testing}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1988}, note = {ERIC No. ED295982).}, address = {New Orleans LA}, author = {Kirisci, L. and Hsu, T.-C.} } @inbook {1815, title = {On a Rasch-model-based test for non-computerized adaptive testing}, year = {1988}, address = {Langeheine, R. and Rost, J. (Ed.), Latent trait and latent class models. New York: Plenum Press.}, author = {Kubinger, K. D.} } @article {779, title = {CATS, testlets, and test construction: A rationale for putting test developers back into CAT}, journal = {Journal of Educational Measurement}, volume = {32}, year = {1987}, note = {(volume number appears to incorrect)}, pages = {185-202}, author = {Wainer, H., and Kiely, G. L.} } @conference {899, title = {Computerized adaptive testing: A comparison of the nominal response model and the three-parameter logistic model}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1987}, address = {Washington DC}, author = {De Ayala, R. J., and Koch, W. R.} } @booklet {1586, title = {Functional and design specifications for the National Council of State Boards of Nursing adaptive testing system}, year = {1987}, address = {Unpublished manuscript}, author = {A Zara and Bosma, J. and Kaplan, R.} } @article {2134, title = {Item clusters and computerized adaptive testing: A case for testlets}, journal = {Journal of Educational Measurement}, volume = {24}, year = {1987}, pages = {185-201}, author = {Wainer, H., and Kiely, G. L.} } @booklet {1555, title = {CATs, testlets, and test construction: A rationale for putting test developers back into CAT (Technical Report 86-71)}, year = {1986}, note = {$\#$WA86-71}, address = {Princeton NJ: Educational Testing Service, Program Statistics Research}, author = {Wainer, H., and Kiely, G. L.} } @booklet {1561, title = {College Board computerized placement tests: Validation of an adaptive test of basic skills (Research Report 86-29)}, year = {1986}, address = {Princeton NJ: Educational Testing Service.}, author = {W. C. Ward and Kline, R. G. and Flaugher, J.} } @inbook {1807, title = {Computerized adaptive testing: A pilot project}, year = {1986}, address = {W. C. Ryan (ed.), Proceedings: NECC 86, National Educational Computing Conference (pp.172-176). Eugene OR: University of Oregon, International Council on Computers in Education.}, author = {Kingsbury, G. G.} } @conference {983, title = {Operational characteristics of adaptive testing procedures using partial credit scoring}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1986}, note = {$\#$KO86-01}, address = {San Francisco CA}, author = {Koch, W. R. and Dodd. B. G.} } @article {222, title = {Adaptive self-referenced testing as a procedure for the measurement of individual change due to instruction: A comparison of the reliabilities of change estimates obtained from conventional and adaptive testing procedures}, journal = {Dissertation Abstracts International}, volume = {45}, number = {9-B}, year = {1985}, pages = {3057}, keywords = {computerized adaptive testing}, author = {Kingsbury, G. G.} } @article {528, title = {ALPHATAB: A lookup table for Bayesian computerized adaptive testing}, journal = {Applied Psychological Measurement}, volume = {9}, year = {1985}, pages = {326}, author = {De Ayala, R. J., and Koch, W. R.} } @conference {982, title = {Computerized adaptive attitude measurement}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1985}, address = {Chicago}, author = {Koch, W. R. and Dodd, B. G.} } @article {43, title = {Current developments and future directions in computerized personality assessment}, journal = {Journal of Consulting and Clinical Psychology}, volume = {53}, number = {6}, year = {1985}, note = {Miscellaneous Article}, pages = {803-815}, abstract = {Although computer applications in personality assessment have burgeoned rapidly in recent years, the majority of these uses capitalize on the computer{\textquoteright}s speed, accuracy, and memory capacity rather than its potential for the development of new, flexible assessment strategies. A review of current examples of computer usage in personality assessment reveals wide acceptance of automated clerical tasks such as test scoring and even test administration. The computer is also assuming tasks previously reserved for expert clinicians, such as writing narrative interpretive reports from test results. All of these functions represent automation of established assessment devices and interpretive strategies. The possibility also exists of harnessing some of the computer{\textquoteright}s unique adaptive capabilities to alter standard devices and even develop new ones. Three proposed strategies for developing computerized adaptive personality tests are described, with the conclusion that the computer{\textquoteright}s potential in this area justifies a call for further research efforts., (C) 1985 by the American Psychological Association}, author = {Butcher, J. N. and Keller, L. S. and Bacon, S. F.} } @conference {1067, title = {A validity study of the computerized adaptive testing version of the Armed Services Vocational Aptitude Battery}, booktitle = {Proceedings of the 27th Annual Conference of the Military Testing Association}, year = {1985}, author = {Moreno, K. E. and Segall, D. O. and Kieckhaefer, W. F.} } @book {1678, title = {Adaptive self-referenced testing as a procedure for the measurement of individual change in instruction: A comparison of the reliabilities of change estimates obtained from conventional and adaptive testing procedures}, year = {1984}, address = {Unpublished doctoral dissertation, Univerity of Minnesota, Minneapolis}, author = {Kingsbury, G. G.} } @article {2015, title = {Item Location Effects and Their Implications for IRT Equating and Adaptive Testing}, journal = {Applied Psychological Measurement}, volume = {8}, year = {1984}, pages = {147-154}, author = {Kingston, N. M. and Dorans, N. J.} } @booklet {1418, title = {Alternate forms reliability and concurrent validity of adaptive and conventional tests with military recruits}, year = {1983}, address = {Minneapolis MN: University of Minnesota, Department of Psychology, Computerized Adaptive Testing Laboratory}, author = {Kiely, G. L. and A Zara and Weiss, D. J.} } @inbook {1950, title = {A comparison of IRT-based adaptive mastery testing and a sequential mastery testing procedure}, year = {1983}, address = {D. J. Weiss (Ed.), New horizons in testing: Latent trait theory and computerized adaptive testing (pp. 1-8). New York: Academic Press.}, author = {Kingsbury, G.G. and Weiss, D. J.} } @inbook {226, title = {A comparison of IRT-based adaptive mastery testing and a sequential mastery testing procedure.}, booktitle = {New horizons in testing: Latent trait test theory and computerized adaptive testing}, year = {1983}, pages = {258-283}, publisher = {Academic Press.}, organization = {Academic Press.}, address = {New York, NY. USA}, author = {Kingsbury, G. G. and Weiss, D. J.} } @inbook {1812, title = {A comparison of IRT-based adaptive mastery testing and a sequential mastery testing procedure}, year = {1983}, address = {D. J. Weiss (Ed.), New horizons in testing: Latent trait test theory and computerized adaptive testing (pp. 257-283). New York: Academic Press.}, author = {Kingsbury, G. G. and Weiss, D. J.} } @booklet {1423, title = {A validity comparison of adaptive and conventional strategies for mastery testing (Research Report 81-3)}, year = {1981}, note = {{PDF file, 1.855 MB}}, address = {Minneapolis, Department of Psychology, Psychometric Methods Program, Computerized Adaptive Testing Laboratory}, author = {Kingsbury, G. G. and Weiss, D. J.} } @booklet {1422, title = {An alternate-forms reliability and concurrent validity comparison of Bayesian adaptive and conventional ability tests (Research Report 80-5)}, year = {1980}, note = {{PDF file, 1.11 MB}}, address = {Minneapolis, Department of Psychology, Psychometric Methods Program, Computerized Adaptive Testing Laboratory}, author = {Kingsbury, G. G. and Weiss, D. J.} } @booklet {1421, title = {A comparison of adaptive, sequential, and conventional testing strategies for mastery decisions (Research Report 80-4)}, year = {1980}, note = {{PDF file, 1.905 MB}}, address = {Minneapolis, Department of Psychology, Psychometric Methods Program, Computerized Adaptive Testing Laboratory}, author = {Kingsbury, G. G. and Weiss, D. J.} } @inbook {1811, title = {A comparison of ICC-based adaptive mastery testing and the Waldian probability ratio method}, year = {1980}, note = {51 MB}}, address = {D. J. Weiss (Ed.). Proceedings of the 1979 Computerized Adaptive Testing Conference (pp. 120-139). Minneapolis MN: University of Minnesota, Department of Psychology, Psychometric Methods Program, Computerized Adaptive Testing Laboratory}, author = {Kingsbury, G. G. and Weiss, D. J.} } @booklet {1417, title = {Computerized instructional adaptive testing model: Formulation and validation (AFHRL-TR-79-33, Final Report)}, year = {1980}, address = {Brooks Air Force Base TX: Air Force Human Resources Laboratory", Also Catalog of Selected Documents in Psychology, February 1981, 11, 20 (Ms. No, 2217) }, author = {Kalisch, S. J.} } @booklet {1427, title = {An empirical study of a broad range test of verbal ability}, year = {1980}, address = {Princeton NJ: Educational Testing Service}, author = {Kreitzberg, C. B. and Jones, D. J.} } @inbook {1804, title = {A model for computerized adaptive testing related to instructional situations}, year = {1980}, note = {{PDF file, 965 KB}}, address = {D. J. Weiss (Ed.). Proceedings of the 1979 Computerized Adaptive Testing Conference (pp. 101-119). Minneapolis MN: University of Minnesota, Department of Psychology, Psychometric Methods Program, Computerized Adaptive Testing Laboratory.}, author = {Kalisch, S. J.} } @booklet {1420, title = {An adaptive testing strategy for mastery decisions (Research Report 79-5)}, year = {1979}, note = {{PDF file, 2.146 MB}}, address = {Minneapolis: University of Minnesota, Department of Psychology, Psychometric Methods Program}, author = {Kingsbury, G. G. and Weiss, D. J.} } @booklet {1425, title = {Problems in application of latent-trait models to tailored testing (Research Report 79-1)}, year = {1979}, address = {Columbia MO: University of Missouri, Department of Psychology", (also presented at National Council on Measurement in Education, 1979: ERIC No. ED 177 196) note = "}, author = {Koch, W. J. and Reckase, M. D.} } @book {1679, title = {The Rasch model in computerized personality testing}, year = {1979}, address = {Ph.D. dissertation, University of Missouri, Columbia, 1979}, author = {Kunce, C. S.} } @inbook {1765, title = {Applications of sequential testing procedures to performance testing}, year = {1978}, address = {D. J. Weiss (Ed.), Proceedings of the 1977 Computerized Adaptive Testing Conference. Minneapolis MN: University of Minnesota, Department of Psychology, Psychometric Methods Program.}, author = {Epstein, K. I. and Knerr, C. S.} } @article {601, title = {Computerized adaptive testing: Principles and directions}, journal = {Computers and Education}, volume = {2 (4)}, year = {1978}, pages = {319-329}, author = {Kreitzberg, C. B.} } @article {602, title = {Computerized adaptive testing: Principles and directions}, journal = {Computers and Education}, volume = {2}, year = {1978}, pages = {319-329}, author = {Kreitzberg, C. B. and Stocking, M., and Swanson, L.} } @booklet {1424, title = {A live tailored testing comparison study of the one- and three-parameter logistic models (Research Report 78-1)}, year = {1978}, address = {Columbia MO: University of Missouri, Department of Psychology}, author = {Koch, W. J. and Reckase, M. D.} } @proceedings {120, title = {Applications of sequential testing procedures to performance testing}, journal = {1977 Computerized Adaptive Testing Conference}, year = {1977}, publisher = {University of Minnesota}, address = {Minneapolis, MN. USA}, author = {Epstein, K. I. and Knerr, C. S.} } @booklet {1327, title = {Calibration of an item pool for the adaptive measurement of achievement (Research Report 77-5)}, year = {1977}, address = {Minneapolis: Department of Psychology, Psychometric Methods Program}, author = {Bejar, I. I. and Weiss, D. J. and Kingsbury, G. G.} } @conference {2223, title = {Real-data simulation of a proposal for tailored teting}, booktitle = {Third International Conference on Educational Testing}, year = {1977}, month = {06/1977}, address = {Leyden, The Netherlands}, author = {Killcross, M. C.} } @inbook {1813, title = {Student attitudes toward tailored testing}, year = {1977}, address = {D. J. Weiss (Ed.), Proceedings of the 1977 Computerized Adaptive Testing Conference. Minneapolis MN: University of Minnesota, Department of Psychology, Psychometric Methods Program.}, author = {Koch, W. R. and Patience, W. M.} } @article {519, title = {TAILOR: A FORTRAN procedure for interactive tailored testing}, journal = {Educational and Psychological Measurement}, volume = {37}, year = {1977}, pages = {767-769}, author = {Cudeck, R. A. and Cliff, N. A. and Kehoe, J.} } @booklet {1419, title = {A review of research in tailored testing (Report APRE No}, year = {1976}, address = {9/76, Farnborough, Hants, U. K.: Ministry of Defence, Army Personnel Research Establishment.)}, author = {Killcross, M. C.} } @book {1677, title = {The comparison of two tailored testing models and the effects of the models variables on actual loss}, year = {1974}, address = {Unpublished doctoral dissertation, Florida State University}, author = {Kalisch, S. J.} } @conference {2225, title = {An empirical investigation of the stability and accuracy of flexilevel tests}, booktitle = {Annual meeting of the National Council on Measurement in Education}, year = {1974}, month = {03/1074}, address = {Chicago IL}, author = {Kocher, A.T.} } @article {587, title = {A tailored testing model employing the beta distribution and conditional difficulties}, journal = {Journal of Computer-Based Instruction}, volume = {1}, year = {1974}, pages = {22-28}, author = {Kalisch, S. J.} } @booklet {1416, title = {A tailored testing model employing the beta distribution (unpublished manuscript)}, year = {1974}, address = {Florida State University, Educational Evaluation and Research Design Program}, author = {Kalisch, S. J.} } @conference {964, title = {A tailored testing system for selection and allocation in the British Army}, booktitle = {Paper presented at the 18th International Congress of Applied Psychology}, year = {1974}, address = {Montreal Canada}, author = {Killcross, M. C.} } @conference {2224, title = {The potential use of tailored testing for allocation to army employments}, booktitle = {NATO Conference on Utilisation of Human Resources}, year = {1973}, month = {06/1973}, address = {Lisbon, Portugal}, author = {Killcross, M. C. and Cassie, A} } @article {586, title = {A tailored testing model employing the beta distribution and conditional difficulties}, journal = {Journal of Computer-Based Instruction}, volume = {1}, year = {1973}, pages = {111-120}, author = {Kalisch, S. J.} } @article {589, title = {Use of an on-line computer for psychological testing with the up-and-down method}, journal = {American Psychologist}, volume = {24}, year = {1969}, pages = {207-211}, author = {Kappauf, W. E.} } @booklet {1426, title = {Progress report on the sequential item test}, year = {1959}, address = {East Lansing MI: Michigan State University, Bureau of Educational Research}, author = {Krathwohl, D.} } @article {600, title = {The sequential item test}, journal = {American Psychologist}, volume = {2}, year = {1956}, pages = {419}, author = {Krathwohl, D. R. and Huyser, R. J.} }