@article {2617, title = {Development of a Computerized Adaptive Test for Anxiety Based on the Dutch{\textendash}Flemish Version of the PROMIS Item Bank}, journal = {Assessment}, year = {In Press}, abstract = {We used the Dutch{\textendash}Flemish version of the USA PROMIS adult V1.0 item bank for Anxiety as input for developing a computerized adaptive test (CAT) to measure the entire latent anxiety continuum. First, psychometric analysis of a combined clinical and general population sample (N = 2,010) showed that the 29-item bank has psychometric properties that are required for a CAT administration. Second, a post hoc CAT simulation showed efficient and highly precise measurement, with an average number of 8.64 items for the clinical sample, and 9.48 items for the general population sample. Furthermore, the accuracy of our CAT version was highly similar to that of the full item bank administration, both in final score estimates and in distinguishing clinical subjects from persons without a mental health disorder. We discuss the future directions and limitations of CAT development with the Dutch{\textendash}Flemish version of the PROMIS Anxiety item bank.}, doi = {10.1177/1073191117746742}, url = {https://doi.org/10.1177/1073191117746742}, author = {Gerard Flens and Niels Smits and Caroline B. Terwee and Joost Dekker and Irma Huijbrechts and Philip Spinhoven and Edwin de Beurs} } @article {2611, title = {Measurement efficiency for fixed-precision multidimensional computerized adaptive tests: Comparing health measurement and educational testing using example banks}, journal = {Applied Psychological Measurement}, year = {In Press}, author = {Paap, Muirne C. S. and Born, Sebastian and Braeken, Johan} } @article {2043, title = {Optimizing cognitive ability measurement with multidimensional computer adaptive testing}, journal = {International Journal of Testing}, year = {In Press}, author = {Makransky, G. and Glas, C. A. W.} } @article {2757, title = {The Influence of Computerized Adaptive Testing on Psychometric Theory and Practice}, journal = {Journal of Computerized Adaptive Testing}, volume = {11}, year = {2024}, abstract = {

The major premise of this article is that part of the stimulus for the evolution of psychometric theory since the 1950s was the introduction of the concept of computerized adaptive testing (CAT) or its earlier non-CAT variations. The conceptual underpinnings of CAT that had the most influence on psychometric theory was the shift of emphasis from the test (or test score) as the focus of analysis to the test item (or item score). The change in focus allowed a change in the way that test results are conceived of as measurements. It also resolved the conflict among a number of ideas that were present in the early work on psychometric theory. Some of the conflicting ideas are summarized below to show how work on the development of CAT resolved some of those conflicts.

}, keywords = {computerized adaptive testing, Item Response Theory, paradigm shift, scaling theory, test design}, issn = {2165-6592}, doi = {10.7333/2403-1101001}, url = {https://jcatpub.net/index.php/jcat/issue/view/34/9}, author = {Reckase, Mark D.} } @article {2753, title = {Expanding the Meaning of Adaptive Testing to Enhance Validity}, journal = {Journal of Computerized Adaptive Testing}, volume = {10}, year = {2023}, pages = {22-31}, keywords = {Adaptive Testing, CAT, CBT, test-taking disengagement, validity}, doi = {10.7333/2305-1002022}, author = {Steven L. Wise} } @article {2752, title = {An Extended Taxonomy of Variants of Computerized Adaptive Testing}, journal = {Journal of Computerized Adaptive Testing}, volume = {10}, year = {2023}, keywords = {Adaptive Testing, evidence-centered design, Item Response Theory, knowledge-based model construction, missingness}, issn = {2165-6592}, doi = {10.7333/2302-100101}, author = {Roy Levy and John T. Behrens and Robert J. Mislevy} } @article {2754, title = {How Do Trait Change Patterns Affect the Performance of Adaptive Measurement of Change?}, journal = {Journal of Computerized Adaptive Testing}, volume = {10}, year = {2023}, pages = {32-58}, keywords = {adaptive measurement of change, computerized adaptive testing, longitudinal measurement, trait change patterns}, doi = {10.7333/2307-1003032}, author = {Ming Him Tai and Allison W. Cooperman and Joseph N. DeWeese and David J. Weiss} } @article {2750, title = {Improving Precision of CAT Measures}, journal = {Journal of Computerized Adaptive Test-ing}, volume = {9}, year = {2022}, month = {10/2022}, pages = {1-7}, keywords = {: dichotomously scored items, option probability theory, scoring methods, subjective probability}, issn = {2165-6592}, doi = {10.7333/2210-0901001}, author = {John J. Barnard} } @article {2751, title = {The (non)Impact of Misfitting Items in Computerized Adaptive Testing}, journal = {Journal of Computerized Adaptive Testing}, volume = {9}, year = {2022}, keywords = {computerized adaptive testing, item fit, three-parameter logistic model}, doi = {10.7333/2211-0902008}, url = {https://jcatpub.net/index.php/jcat/issue/view/26}, author = {Christine E. DeMars} } @article {2730, title = {A Blocked-CAT Procedure for CD-CAT}, journal = {Applied Psychological Measurement}, volume = {44}, number = {1}, year = {2020}, pages = {49-64}, abstract = {This article introduces a blocked-design procedure for cognitive diagnosis computerized adaptive testing (CD-CAT), which allows examinees to review items and change their answers during test administration. Four blocking versions of the new procedure were proposed. In addition, the impact of several factors, namely, item quality, generating model, block size, and test length, on the classification rates was investigated. Three popular item selection indices in CD-CAT were used and their efficiency compared using the new procedure. An additional study was carried out to examine the potential benefit of item review. The results showed that the new procedure is promising in that allowing item review resulted only in a small loss in attribute classification accuracy under some conditions. Moreover, using a blocked-design CD-CAT is beneficial to the extent that it alleviates the negative impact of test anxiety on examinees{\textquoteright} true performance.}, doi = {10.1177/0146621619835500}, url = {https://doi.org/10.1177/0146621619835500}, author = {Mehmet Kaplan and Jimmy de la Torre} } @article {2721, title = {Computerized adaptive testing to screen children for emotional and behavioral problems by preventive child healthcare}, journal = {BMC Pediatrics}, volume = {20}, year = {2020}, abstract = {

Background

Questionnaires to detect emotional and behavioral problems (EBP) in Preventive Child Healthcare (PCH) should be short which potentially affects validity and reliability. Simulation studies have shown that Computerized Adaptive Testing (CAT) could overcome these weaknesses. We studied the applicability (using the measures participation rate, satisfaction, and efficiency) and the validity of CAT in routine PCH practice.

Methods

We analyzed data on 461 children aged 10\–11\ years (response 41\%), who were assessed during routine well-child examinations by PCH professionals. Before the visit, parents completed the CAT and the Child Behavior Checklist (CBCL). Satisfaction was measured by parent- and PCH professional-report. Efficiency of the CAT procedure was measured as number of items needed to assess whether a child has serious problems or not. Its validity was assessed using the CBCL as the criterion.

Results

Parents and PCH professionals rated the CAT on average as good. The procedure required at average 16 items to assess whether a child has serious problems or not. Agreement of scores on the CAT scales with corresponding CBCL scales was high (range of Spearman correlations 0.59\–0.72). Area Under Curves (AUC) were high (range: 0.95\–0.97) for the Psycat total, externalizing, and hyperactivity scales using corresponding CBCL scale scores as criterion. For the Psycat internalizing scale the AUC was somewhat lower but still high (0.86).

Conclusions

CAT is a valid procedure for the identification of emotional and behavioral problems in children aged 10\–11\ years. It may support the efficient and accurate identification of children with overall, and potentially also specific, emotional and behavioral problems in routine PCH.

}, url = {https://bmcpediatr.biomedcentral.com/articles/10.1186/s12887-020-2018-1}, author = {Theunissen, Meninou H.C. and de Wolff, Marianne S. and Deurloo, Jacqueline A. and Vogels, Anton G. C.} } @article {2732, title = {A Dynamic Stratification Method for Improving Trait Estimation in Computerized Adaptive Testing Under Item Exposure Control}, journal = {Applied Psychological Measurement}, volume = {44}, number = {3}, year = {2020}, pages = {182-196}, abstract = {When computerized adaptive testing (CAT) is under stringent item exposure control, the precision of trait estimation will substantially decrease. A new item selection method, the dynamic Stratification method based on Dominance Curves (SDC), which is aimed at improving trait estimation, is proposed to mitigate this problem. The objective function of the SDC in item selection is to maximize the sum of test information for all examinees rather than maximizing item information for individual examinees at a single-item administration, as in conventional CAT. To achieve this objective, the SDC uses dominance curves to stratify an item pool into strata with the number being equal to the test length to precisely and accurately increase the quality of the administered items as the test progresses, reducing the likelihood that a high-discrimination item will be administered to an examinee whose ability is not close to the item difficulty. Furthermore, the SDC incorporates a dynamic process for on-the-fly item{\textendash}stratum adjustment to optimize the use of quality items. Simulation studies were conducted to investigate the performance of the SDC in CAT under item exposure control at different levels of severity. According to the results, the SDC can efficiently improve trait estimation in CAT through greater precision and more accurate trait estimation than those generated by other methods (e.g., the maximum Fisher information method) in most conditions.}, doi = {10.1177/0146621619843820}, url = {https://doi.org/10.1177/0146621619843820}, author = {Jyun-Hong Chen and Hsiu-Yi Chao and Shu-Ying Chen} } @article {2731, title = {Framework for Developing Multistage Testing With Intersectional Routing for Short-Length Tests}, journal = {Applied Psychological Measurement}, volume = {44}, number = {2}, year = {2020}, pages = {87-102}, abstract = {Multistage testing (MST) has many practical advantages over typical item-level computerized adaptive testing (CAT), but there is a substantial tradeoff when using MST because of its reduced level of adaptability. In typical MST, the first stage almost always performs as a routing stage in which all test takers see a linear test form. If multiple test sections measure different but moderately or highly correlated traits, then a score estimate for one section might be capable of adaptively selecting item modules for following sections without having to administer routing stages repeatedly for each section. In this article, a new framework for developing MST with intersectional routing (ISR) was proposed and evaluated under several research conditions with different MST structures, section score distributions and relationships, and types of regression models for ISR. The overall findings of the study suggested that MST with ISR approach could improve measurement efficiency and test optimality especially with tests with short lengths.}, doi = {10.1177/0146621619837226}, url = {https://doi.org/10.1177/0146621619837226}, author = {Kyung (Chris) T. Han} } @article {2735, title = {Item Calibration Methods With Multiple Subscale Multistage Testing}, journal = {Journal of Educational Measurement}, volume = {57}, number = {1}, year = {2020}, pages = {3-28}, abstract = {Abstract Many large-scale educational surveys have moved from linear form design to multistage testing (MST) design. One advantage of MST is that it can provide more accurate latent trait (θ) estimates using fewer items than required by linear tests. However, MST generates incomplete response data by design; hence, questions remain as to how to calibrate items using the incomplete data from MST design. Further complication arises when there are multiple correlated subscales per test, and when items from different subscales need to be calibrated according to their respective score reporting metric. The current calibration-per-subscale method produced biased item parameters, and there is no available method for resolving the challenge. Deriving from the missing data principle, we showed when calibrating all items together the Rubin{\textquoteright}s ignorability assumption is satisfied such that the traditional single-group calibration is sufficient. When calibrating items per subscale, we proposed a simple modification to the current calibration-per-subscale method that helps reinstate the missing-at-random assumption and therefore corrects for the estimation bias that is otherwise existent. Three mainstream calibration methods are discussed in the context of MST, they are the marginal maximum likelihood estimation, the expectation maximization method, and the fixed parameter calibration. An extensive simulation study is conducted and a real data example from NAEP is analyzed to provide convincing empirical evidence.}, keywords = {EM, marginal maximum likelihood, missing data, multistage testing}, doi = {10.1111/jedm.12241}, url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/jedm.12241}, author = {Wang, Chun and Chen, Ping and Jiang, Shengyu} } @article {2734, title = {Item Selection and Exposure Control Methods for Computerized Adaptive Testing with Multidimensional Ranking Items}, journal = {Journal of Educational Measurement}, volume = {57}, number = {2}, year = {2020}, pages = {343-369}, abstract = {Abstract The use of computerized adaptive testing algorithms for ranking items (e.g., college preferences, career choices) involves two major challenges: unacceptably high computation times (selecting from a large item pool with many dimensions) and biased results (enhanced preferences or intensified examinee responses because of repeated statements across items). To address these issues, we introduce subpool partition strategies for item selection and within-person statement exposure control procedures. Simulations showed that the multinomial method reduces computation time while maintaining measurement precision. Both the freeze and revised Sympson-Hetter online (RSHO) methods controlled the statement exposure rate; RSHO sacrificed some measurement precision but increased pool use. Furthermore, preventing a statement{\textquoteright}s repetition on consecutive items neither hindered the effectiveness of the freeze or RSHO method nor reduced measurement precision.}, doi = {10.1111/jedm.12252}, url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/jedm.12252}, author = {Chen, Chia-Wen and Wang, Wen-Chung and Chiu, Ming Ming and Ro, Sage} } @article {2729, title = {Multidimensional Test Assembly Using Mixed-Integer Linear Programming: An Application of Kullback{\textendash}Leibler Information}, journal = {Applied Psychological Measurement}, volume = {44}, number = {1}, year = {2020}, pages = {17-32}, abstract = {Many educational testing programs require different test forms with minimal or no item overlap. At the same time, the test forms should be parallel in terms of their statistical and content-related properties. A well-established method to assemble parallel test forms is to apply combinatorial optimization using mixed-integer linear programming (MILP). Using this approach, in the unidimensional case, Fisher information (FI) is commonly used as the statistical target to obtain parallelism. In the multidimensional case, however, FI is a multidimensional matrix, which complicates its use as a statistical target. Previous research addressing this problem focused on item selection criteria for multidimensional computerized adaptive testing (MCAT). Yet these selection criteria are not directly transferable to the assembly of linear parallel test forms. To bridge this gap the authors derive different statistical targets, based on either FI or the Kullback{\textendash}Leibler (KL) divergence, that can be applied in MILP models to assemble multidimensional parallel test forms. Using simulated item pools and an item pool based on empirical items, the proposed statistical targets are compared and evaluated. Promising results with respect to the KL-based statistical targets are presented and discussed.}, doi = {10.1177/0146621619827586}, url = {https://doi.org/10.1177/0146621619827586}, author = {Dries Debeer and Peter W. van Rijn and Usama S. Ali} } @article {2728, title = {New Efficient and Practicable Adaptive Designs for Calibrating Items Online}, journal = {Applied Psychological Measurement}, volume = {44}, number = {1}, year = {2020}, pages = {3-16}, abstract = {When calibrating new items online, it is practicable to first compare all new items according to some criterion and then assign the most suitable one to the current examinee who reaches a seeding location. The modified D-optimal design proposed by van der Linden and Ren (denoted as D-VR design) works within this practicable framework with the aim of directly optimizing the estimation of item parameters. However, the optimal design point for a given new item should be obtained by comparing all examinees in a static examinee pool. Thus, D-VR design still has room for improvement in calibration efficiency from the view of traditional optimal design. To this end, this article incorporates the idea of traditional optimal design into D-VR design and proposes a new online calibration design criterion, namely, excellence degree (ED) criterion. Four different schemes are developed to measure the information provided by the current examinee when implementing this new criterion, and four new ED designs equipped with them are put forward accordingly. Simulation studies were conducted under a variety of conditions to compare the D-VR design and the four proposed ED designs in terms of calibration efficiency. Results showed that the four ED designs outperformed D-VR design in almost all simulation conditions.}, doi = {10.1177/0146621618824854}, url = {https://doi.org/10.1177/0146621618824854}, author = {Yinhong He and Ping Chen and Yong Li} } @article {2739, title = {The Optimal Item Pool Design in Multistage Computerized Adaptive Tests With the p-Optimality Method}, journal = {Educational and Psychological Measurement}, volume = {80}, number = {5}, year = {2020}, pages = {955-974}, abstract = {The present study extended the p-optimality method to the multistage computerized adaptive test (MST) context in developing optimal item pools to support different MST panel designs under different test configurations. Using the Rasch model, simulated optimal item pools were generated with and without practical constraints of exposure control. A total number of 72 simulated optimal item pools were generated and evaluated by an overall sample and conditional sample using various statistical measures. Results showed that the optimal item pools built with the p-optimality method provide sufficient measurement accuracy under all simulated MST panel designs. Exposure control affected the item pool size, but not the item distributions and item pool characteristics. This study demonstrated that the p-optimality method can adapt to MST item pool design, facilitate the MST assembly process, and improve its scoring accuracy.}, doi = {10.1177/0013164419901292}, url = {https://doi.org/10.1177/0013164419901292}, author = {Lihong Yang and Mark D. Reckase} } @article {2733, title = {Stratified Item Selection Methods in Cognitive Diagnosis Computerized Adaptive Testing}, journal = {Applied Psychological Measurement}, volume = {44}, number = {5}, year = {2020}, pages = {346-361}, abstract = {Cognitive diagnostic computerized adaptive testing (CD-CAT) aims to obtain more useful diagnostic information by taking advantages of computerized adaptive testing (CAT). Cognitive diagnosis models (CDMs) have been developed to classify examinees into the correct proficiency classes so as to get more efficient remediation, whereas CAT tailors optimal items to the examinee{\textquoteright}s mastery profile. The item selection method is the key factor of the CD-CAT procedure. In recent years, a large number of parametric/nonparametric item selection methods have been proposed. In this article, the authors proposed a series of stratified item selection methods in CD-CAT, which are combined with posterior-weighted Kullback{\textendash}Leibler (PWKL), nonparametric item selection (NPS), and weighted nonparametric item selection (WNPS) methods, and named S-PWKL, S-NPS, and S-WNPS, respectively. Two different types of stratification indices were used: original versus novel. The performances of the proposed item selection methods were evaluated via simulation studies and compared with the PWKL, NPS, and WNPS methods without stratification. Manipulated conditions included calibration sample size, item quality, number of attributes, number of strata, and data generation models. Results indicated that the S-WNPS and S-NPS methods performed similarly, and both outperformed the S-PWKL method. And item selection methods with novel stratification indices performed slightly better than the ones with original stratification indices, and those without stratification performed the worst.}, doi = {10.1177/0146621619893783}, url = {https://doi.org/10.1177/0146621619893783}, author = {Jing Yang and Hua-Hua Chang and Jian Tao and Ningzhong Shi} } @article {2718, title = {Three Measures of Test Adaptation Based on Optimal Test Information}, journal = {Journal of Computerized Adaotive Testing}, volume = {8}, year = {2020}, pages = {1-19}, issn = {2165-6592}, doi = {10.7333/2002-0801001}, url = {http://iacat.org/jcat/index.php/jcat/article/view/80/37}, author = {G. Gage Kingsbury and Steven L. Wise} } @article {2719, title = {Three Measures of Test Adaptation Based on Optimal Test Information}, journal = {Journal of Computerized Adaotive Testing}, volume = {8}, year = {2020}, pages = {1-19}, issn = {2165-6592}, doi = {10.7333/2002-0801001}, url = {http://iacat.org/jcat/index.php/jcat/article/view/80/37}, author = {G. Gage Kingsbury and Steven L. Wise} } @article {2694, title = {Adaptive Testing With a Hierarchical Item Response Theory Model}, journal = {Applied Psychological Measurement}, volume = {43}, number = {1}, year = {2019}, pages = {51-67}, abstract = {The hierarchical item response theory (H-IRT) model is very flexible and allows a general factor and subfactors within an overall structure of two or more levels. When an H-IRT model with a large number of dimensions is used for an adaptive test, the computational burden associated with interim scoring and selection of subsequent items is heavy. An alternative approach for any high-dimension adaptive test is to reduce dimensionality for interim scoring and item selection and then revert to full dimensionality for final score reporting, thereby significantly reducing the computational burden. This study compared the accuracy and efficiency of final scoring for multidimensional, local multidimensional, and unidimensional item selection and interim scoring methods, using both simulated and real item pools. The simulation study was conducted under 10 conditions (i.e., five test lengths and two H-IRT models) with a simulated sample of 10,000 students. The study with the real item pool was conducted using item parameters from an actual 45-item adaptive test with a simulated sample of 10,000 students. Results indicate that the theta estimations provided by the local multidimensional and unidimensional item selection and interim scoring methods were relatively as accurate as the theta estimation provided by the multidimensional item selection and interim scoring method, especially during the real item pool study. In addition, the multidimensional method required the longest computation time and the unidimensional method required the shortest computation time.}, doi = {10.1177/0146621618765714}, url = {https://doi.org/10.1177/0146621618765714}, author = {Wenhao Wang and Neal Kingston} } @article {2724, title = {Application of Dimension Reduction to CAT Item Selection Under the Bifactor Model}, journal = {Applied Psychological Measurement}, volume = {43}, number = {6}, year = {2019}, pages = {419-434}, abstract = {Multidimensional computerized adaptive testing (MCAT) based on the bifactor model is suitable for tests with multidimensional bifactor measurement structures. Several item selection methods that proved to be more advantageous than the maximum Fisher information method are not practical for bifactor MCAT due to time-consuming computations resulting from high dimensionality. To make them applicable in bifactor MCAT, dimension reduction is applied to four item selection methods, which are the posterior-weighted Fisher D-optimality (PDO) and three non-Fisher information-based methods{\textemdash}posterior expected Kullback{\textendash}Leibler information (PKL), continuous entropy (CE), and mutual information (MI). They were compared with the Bayesian D-optimality (BDO) method in terms of estimation precision. When both the general and group factors are the measurement objectives, BDO, PDO, CE, and MI perform equally well and better than PKL. When the group factors represent nuisance dimensions, MI and CE perform the best in estimating the general factor, followed by the BDO, PDO, and PKL. How the bifactor pattern and test length affect estimation accuracy was also discussed.}, doi = {10.1177/0146621618813086}, url = {https://doi.org/10.1177/0146621618813086}, author = {Xiuzhen Mao and Jiahui Zhang and Tao Xin} } @article {2723, title = {Computerized Adaptive Testing for Cognitively Based Multiple-Choice Data}, journal = {Applied Psychological Measurement}, volume = {43}, number = {5}, year = {2019}, pages = {388-401}, abstract = {Cognitive diagnosis models (CDMs) are latent class models that hold great promise for providing diagnostic information about student knowledge profiles. The increasing use of computers in classrooms enhances the advantages of CDMs for more efficient diagnostic testing by using adaptive algorithms, referred to as cognitive diagnosis computerized adaptive testing (CD-CAT). When multiple-choice items are involved, CD-CAT can be further improved by using polytomous scoring (i.e., considering the specific options students choose), instead of dichotomous scoring (i.e., marking answers as either right or wrong). In this study, the authors propose and evaluate the performance of the Jensen{\textendash}Shannon divergence (JSD) index as an item selection method for the multiple-choice deterministic inputs, noisy {\textquotedblleft}and{\textquotedblright} gate (MC-DINA) model. Attribute classification accuracy and item usage are evaluated under different conditions of item quality and test termination rule. The proposed approach is compared with the random selection method and an approximate approach based on dichotomized responses. The results show that under the MC-DINA model, JSD improves the attribute classification accuracy significantly by considering the information from distractors, even with a very short test length. This result has important implications in practical classroom settings as it can allow for dramatically reduced testing times, thus resulting in more targeted learning opportunities.}, doi = {10.1177/0146621618798665}, url = {https://doi.org/10.1177/0146621618798665}, author = {Hulya D. Yigit and Miguel A. Sorrel and Jimmy de la Torre} } @article {2736, title = {Computerized Adaptive Testing in Early Education: Exploring the Impact of Item Position Effects on Ability Estimation}, journal = {Journal of Educational Measurement}, volume = {56}, number = {2}, year = {2019}, pages = {437-451}, abstract = {Abstract Studies have shown that item difficulty can vary significantly based on the context of an item within a test form. In particular, item position may be associated with practice and fatigue effects that influence item parameter estimation. The purpose of this research was to examine the relevance of item position specifically for assessments used in early education, an area of testing that has received relatively limited psychometric attention. In an initial study, multilevel item response models fit to data from an early literacy measure revealed statistically significant increases in difficulty for items appearing later in a 20-item form. The estimated linear change in logits for an increase of 1 in position was .024, resulting in a predicted change of .46 logits for a shift from the beginning to the end of the form. A subsequent simulation study examined impacts of item position effects on person ability estimation within computerized adaptive testing. Implications and recommendations for practice are~discussed.}, doi = {10.1111/jedm.12215}, url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/jedm.12215}, author = {Albano, Anthony D. and Cai, Liuhan and Lease, Erin M. and McConnell, Scott R.} } @article {2740, title = {Developing Multistage Tests Using D-Scoring Method}, journal = {Educational and Psychological Measurement}, volume = {79}, number = {5}, year = {2019}, pages = {988-1008}, abstract = {The D-scoring method for scoring and equating tests with binary items proposed by Dimitrov offers some of the advantages of item response theory, such as item-level difficulty information and score computation that reflects the item difficulties, while retaining the merits of classical test theory such as the simplicity of number correct score computation and relaxed requirements for model sample sizes. Because of its unique combination of those merits, the D-scoring method has seen quick adoption in the educational and psychological measurement field. Because item-level difficulty information is available with the D-scoring method and item difficulties are reflected in test scores, it conceptually makes sense to use the D-scoring method with adaptive test designs such as multistage testing (MST). In this study, we developed and compared several versions of the MST mechanism using the D-scoring approach and also proposed and implemented a new framework for conducting MST simulation under the D-scoring method. Our findings suggest that the score recovery performance under MST with D-scoring was promising, as it retained score comparability across different MST paths. We found that MST using the D-scoring method can achieve improvements in measurement precision and efficiency over linear-based tests that use D-scoring method.}, doi = {10.1177/0013164419841428}, url = {https://doi.org/10.1177/0013164419841428}, author = {Kyung (Chris) T. Han and Dimiter M. Dimitrov and Faisal Al-Mashary} } @article {2737, title = {Efficiency of Targeted Multistage Calibration Designs Under Practical Constraints: A Simulation Study}, journal = {Journal of Educational Measurement}, volume = {56}, number = {1}, year = {2019}, pages = {121-146}, abstract = {Abstract Calibration of an item bank for computer adaptive testing requires substantial resources. In this study, we investigated whether the efficiency of calibration under the Rasch model could be enhanced by improving the match between item difficulty and student ability. We introduced targeted multistage calibration designs, a design type that considers ability-related background variables and performance for assigning students to suitable items. Furthermore, we investigated whether uncertainty about item difficulty could impair the assembling of efficient designs. The results indicated that targeted multistage calibration designs were more efficient than ordinary targeted designs under optimal conditions. Limited knowledge about item difficulty reduced the efficiency of one of the two investigated targeted multistage calibration designs, whereas targeted designs were more robust.}, doi = {10.1111/jedm.12203}, url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/jedm.12203}, author = {Berger, St{\'e}phanie and Verschoor, Angela J. and Eggen, Theo J. H. M. and Moser, Urs} } @article {2702, title = {How Adaptive Is an Adaptive Test: Are All Adaptive Tests Adaptive?}, journal = {Journal of Computerized Adaptive Testing}, volume = {7}, year = {2019}, pages = {1-14}, keywords = {computerized adaptive test, multistage test, statistical indicators of amount of adaptation}, doi = {10.7333/1902-0701001}, url = {http://iacat.org/jcat/index.php/jcat/article/view/69/34}, author = {Mark Reckase and Unhee Ju and Sewon Kim} } @article {2741, title = {Imputation Methods to Deal With Missing Responses in Computerized Adaptive Multistage Testing}, journal = {Educational and Psychological Measurement}, volume = {79}, number = {3}, year = {2019}, pages = {495-511}, abstract = {Routing examinees to modules based on their ability level is a very important aspect in computerized adaptive multistage testing. However, the presence of missing responses may complicate estimation of examinee ability, which may result in misrouting of individuals. Therefore, missing responses should be handled carefully. This study investigated multiple missing data methods in computerized adaptive multistage testing, including two imputation techniques, the use of full information maximum likelihood and the use of scoring missing data as incorrect. These methods were examined under the missing completely at random, missing at random, and missing not at random frameworks, as well as other testing conditions. Comparisons were made to baseline conditions where no missing data were present. The results showed that imputation and the full information maximum likelihood methods outperformed incorrect scoring methods in terms of average bias, average root mean square error, and correlation between estimated and true thetas.}, doi = {10.1177/0013164418805532}, url = {https://doi.org/10.1177/0013164418805532}, author = {Dee Duygu Cetin-Berber and Halil Ibrahim Sari and Anne Corinne Huggins-Manley} } @article {2727, title = {An Investigation of Exposure Control Methods With Variable-Length CAT Using the Partial Credit Model}, journal = {Applied Psychological Measurement}, volume = {43}, number = {8}, year = {2019}, pages = {624-638}, abstract = {The purpose of this simulation study was to investigate the effect of several different item exposure control procedures in computerized adaptive testing (CAT) with variable-length stopping rules using the partial credit model. Previous simulation studies on CAT exposure control methods with polytomous items rarely considered variable-length tests. The four exposure control techniques examined were the randomesque with a group of three items, randomesque with a group of six items, progressive-restricted standard error (PR-SE), and no exposure control. The two variable-length stopping rules included were the SE and predicted standard error reduction (PSER), along with three item pools of varied sizes (43, 86, and 172 items). Descriptive statistics on number of nonconvergent cases, measurement precision, testing burden, item overlap, item exposure, and pool utilization were calculated. Results revealed that the PSER stopping rule administered fewer items on average while maintaining measurement precision similar to the SE stopping rule across the different item pool sizes and exposure controls. The PR-SE exposure control procedure surpassed the randomesque methods by further reducing test overlap, maintaining maximum exposure rates at the target rate or lower, and utilizing all items from the pool with a minimal increase in number of items administered and nonconvergent cases.}, doi = {10.1177/0146621618824856}, url = {https://doi.org/10.1177/0146621618824856}, author = {Audrey J. Leroux and J. Kay Waid-Ebbs and Pey-Shan Wen and Drew A. Helmer and David P. Graham and Maureen K. O{\textquoteright}Connor and Kathleen Ray} } @article {2742, title = {Item Selection Criteria With Practical Constraints in Cognitive Diagnostic Computerized Adaptive Testing}, journal = {Educational and Psychological Measurement}, volume = {79}, number = {2}, year = {2019}, pages = {335-357}, abstract = {For item selection in cognitive diagnostic computerized adaptive testing (CD-CAT), ideally, a single item selection index should be created to simultaneously regulate precision, exposure status, and attribute balancing. For this purpose, in this study, we first proposed an attribute-balanced item selection criterion, namely, the standardized weighted deviation global discrimination index (SWDGDI), and subsequently formulated the constrained progressive index (CP\_SWDGDI) by casting the SWDGDI in a progressive algorithm. A simulation study revealed that the SWDGDI method was effective in balancing attribute coverage and the CP\_SWDGDI method was able to simultaneously balance attribute coverage and item pool usage while maintaining acceptable estimation precision. This research also demonstrates the advantage of a relatively low number of attributes in CD-CAT applications.}, doi = {10.1177/0013164418790634}, url = {https://doi.org/10.1177/0013164418790634}, author = {Chuan-Ju Lin and Hua-Hua Chang} } @article {2695, title = {Measurement Efficiency for Fixed-Precision Multidimensional Computerized Adaptive Tests: Comparing Health Measurement and Educational Testing Using Example Banks}, journal = {Applied Psychological Measurement}, volume = {43}, number = {1}, year = {2019}, pages = {68-83}, abstract = {It is currently not entirely clear to what degree the research on multidimensional computerized adaptive testing (CAT) conducted in the field of educational testing can be generalized to fields such as health assessment, where CAT design factors differ considerably from those typically used in educational testing. In this study, the impact of a number of important design factors on CAT performance is systematically evaluated, using realistic example item banks for two main scenarios: health assessment (polytomous items, small to medium item bank sizes, high discrimination parameters) and educational testing (dichotomous items, large item banks, small- to medium-sized discrimination parameters). Measurement efficiency is evaluated for both between-item multidimensional CATs and separate unidimensional CATs for each latent dimension. In this study, we focus on fixed-precision (variable-length) CATs because it is both feasible and desirable in health settings, but so far most research regarding CAT has focused on fixed-length testing. This study shows that the benefits associated with fixed-precision multidimensional CAT hold under a wide variety of circumstances.}, doi = {10.1177/0146621618765719}, url = {https://doi.org/10.1177/0146621618765719}, author = {Muirne C. S. Paap and Sebastian Born and Johan Braeken} } @article {2725, title = {Multidimensional Computerized Adaptive Testing Using Non-Compensatory Item Response Theory Models}, journal = {Applied Psychological Measurement}, volume = {43}, number = {6}, year = {2019}, pages = {464-480}, abstract = {Current use of multidimensional computerized adaptive testing (MCAT) has been developed in conjunction with compensatory multidimensional item response theory (MIRT) models rather than with non-compensatory ones. In recognition of the usefulness of MCAT and the complications associated with non-compensatory data, this study aimed to develop MCAT algorithms using non-compensatory MIRT models and to evaluate their performance. For the purpose of the study, three item selection methods were adapted and compared, namely, the Fisher information method, the mutual information method, and the Kullback{\textendash}Leibler information method. The results of a series of simulations showed that the Fisher information and mutual information methods performed similarly, and both outperformed the Kullback{\textendash}Leibler information method. In addition, it was found that the more stringent the termination criterion and the higher the correlation between the latent traits, the higher the resulting measurement precision and test reliability. Test reliability was very similar across the dimensions, regardless of the correlation between the latent traits and termination criterion. On average, the difficulties of the administered items were found to be at a lower level than the examinees{\textquoteright} abilities, which shed light on item bank construction for non-compensatory items.}, doi = {10.1177/0146621618800280}, url = {https://doi.org/10.1177/0146621618800280}, author = {Chia-Ling Hsu and Wen-Chung Wang} } @article {2726, title = {Nonparametric CAT for CD in Educational Settings With Small Samples}, journal = {Applied Psychological Measurement}, volume = {43}, number = {7}, year = {2019}, pages = {543-561}, abstract = {Cognitive diagnostic computerized adaptive testing (CD-CAT) has been suggested by researchers as a diagnostic tool for assessment and evaluation. Although model-based CD-CAT is relatively well researched in the context of large-scale assessment systems, this type of system has not received the same degree of research and development in small-scale settings, such as at the course-based level, where this system would be the most useful. The main obstacle is that the statistical estimation techniques that are successfully applied within the context of a large-scale assessment require large samples to guarantee reliable calibration of the item parameters and an accurate estimation of the examinees{\textquoteright} proficiency class membership. Such samples are simply not obtainable in course-based settings. Therefore, the nonparametric item selection (NPS) method that does not require any parameter calibration, and thus, can be used in small educational programs is proposed in the study. The proposed nonparametric CD-CAT uses the nonparametric classification (NPC) method to estimate an examinee{\textquoteright}s attribute profile and based on the examinee{\textquoteright}s item responses, the item that can best discriminate the estimated attribute profile and the other attribute profiles is then selected. The simulation results show that the NPS method outperformed the compared parametric CD-CAT algorithms and the differences were substantial when the calibration samples were small.}, doi = {10.1177/0146621618813113}, url = {https://doi.org/10.1177/0146621618813113}, author = {Yuan-Pei Chang and Chia-Yi Chiu and Rung-Ching Tsai} } @article {2738, title = {Routing Strategies and Optimizing Design for Multistage Testing in International Large-Scale Assessments}, journal = {Journal of Educational Measurement}, volume = {56}, number = {1}, year = {2019}, pages = {192-213}, abstract = {Abstract This study investigates the effect of several design and administration choices on item exposure and person/item parameter recovery under a multistage test (MST) design. In a simulation study, we examine whether number-correct (NC) or item response theory (IRT) methods are differentially effective at routing students to the correct next stage(s) and whether routing choices (optimal versus suboptimal routing) have an impact on achievement precision. Additionally, we examine the impact of testlet length on both person and item recovery. Overall, our results suggest that no single approach works best across the studied conditions. With respect to the mean person parameter recovery, IRT scoring (via either Fisher information or preliminary EAP estimates) outperformed classical NC methods, although differences in bias and root mean squared error were generally small. Item exposure rates were found to be more evenly distributed when suboptimal routing methods were used, and item recovery (both difficulty and discrimination) was most precisely observed for items with moderate difficulties. Based on the results of the simulation study, we draw conclusions and discuss implications for practice in the context of international large-scale assessments that recently introduced adaptive assessment in the form of MST. Future research directions are also discussed.}, doi = {10.1111/jedm.12206}, url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/jedm.12206}, author = {Svetina, Dubravka and Liaw, Yuan-Ling and Rutkowski, Leslie and Rutkowski, David} } @article {2717, title = {Time-Efficient Adaptive Measurement of Change}, journal = {Journal of Computerized Adaptive Testing}, volume = {7}, year = {2019}, pages = {15-34}, abstract = {

The adaptive measurement of change (AMC) refers to the use of computerized adaptive testing (CAT) at multiple occasions to efficiently assess a respondent\’s improvement, decline, or sameness from occasion to occasion. Whereas previous AMC research focused on administering the most informative item to a respondent at each stage of testing, the current research proposes the use of Fisher information per time unit as an item selection procedure for AMC. The latter procedure incorporates not only the amount of information provided by a given item but also the expected amount of time required to complete it. In a simulation study, the use of Fisher information per time unit item selection resulted in a lower false positive rate in the majority of conditions studied, and a higher true positive rate in all conditions studied, compared to item selection via Fisher information without accounting for the expected time taken. Future directions of research are suggested.

}, keywords = {adaptive measurement of change, computerized adaptive testing, Fisher information, item selection, response-time modeling}, issn = {2165-6592}, doi = {10.7333/1909-0702015}, url = {http://iacat.org/jcat/index.php/jcat/article/view/73/35}, author = {Matthew Finkelman and Chun Wang} } @article {2673, title = {Adaptive Item Selection Under Matroid Constraints}, journal = {Journal of Computerized Adaptive Testing}, volume = {6}, year = {2018}, pages = {15-36}, doi = {10.7333/1808-0602015}, url = {http://www.iacat.org/jcat/index.php/jcat/article/view/64/32}, author = {Daniel Bengs and Ulf Brefeld and Ulf Kr{\"o}hne} } @article {2697, title = {A Comparison of Constraint Programming and Mixed-Integer Programming for Automated Test-Form Generation}, journal = {Journal of Educational Measurement}, volume = {55}, number = {4}, year = {2018}, pages = {435-456}, abstract = {Abstract The final step of the typical process of developing educational and psychological tests is to place the selected test items in a formatted form. The step involves the grouping and ordering of the items to meet a variety of formatting constraints. As this activity tends to be time-intensive, the use of mixed-integer programming (MIP) has been proposed to automate it. The goal of this article is to show how constraint programming (CP) can be used as an alternative to automate test-form generation problems with a large variety of formatting constraints, and how it compares with MIP-based form generation as for its models, solutions, and running times. Two empirical examples are presented: (i) automated generation of a computerized fixed-form; and (ii) automated generation of shadow tests for multistage testing. Both examples show that CP works well with feasible solutions and running times likely to be better than that for MIP-based applications.}, doi = {10.1111/jedm.12187}, url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/jedm.12187}, author = {Li, Jie and van der Linden, Wim J.} } @article {2691, title = {Constructing Shadow Tests in Variable-Length Adaptive Testing}, journal = {Applied Psychological Measurement}, volume = {42}, number = {7}, year = {2018}, pages = {538-552}, abstract = {Imposing content constraints is very important in most operational computerized adaptive testing (CAT) programs in educational measurement. Shadow test approach to CAT (Shadow CAT) offers an elegant solution to imposing statistical and nonstatistical constraints by projecting future consequences of item selection. The original form of Shadow CAT presumes fixed test lengths. The goal of the current study was to extend Shadow CAT to tests under variable-length termination conditions and evaluate its performance relative to other content balancing approaches. The study demonstrated the feasibility of constructing Shadow CAT with variable test lengths and in operational CAT programs. The results indicated the superiority of the approach compared with other content balancing methods.}, doi = {10.1177/0146621617753736}, url = {https://doi.org/10.1177/0146621617753736}, author = {Qi Diao and Hao Ren} } @article {2690, title = {A Continuous a-Stratification Index for Item Exposure Control in Computerized Adaptive Testing}, journal = {Applied Psychological Measurement}, volume = {42}, number = {7}, year = {2018}, pages = {523-537}, abstract = {The method of a-stratification aims to reduce item overexposure in computerized adaptive testing, as items that are administered at very high rates may threaten the validity of test scores. In existing methods of a-stratification, the item bank is partitioned into a fixed number of nonoverlapping strata according to the items{\textquoteright}a, or discrimination, parameters. This article introduces a continuous a-stratification index which incorporates exposure control into the item selection index itself and thus eliminates the need for fixed discrete strata. The new continuous a-stratification index is compared with existing stratification methods via simulation studies in terms of ability estimation bias, mean squared error, and control of item exposure rates.}, doi = {10.1177/0146621618758289}, url = {https://doi.org/10.1177/0146621618758289}, author = {Alan Huebner and Chun Wang and Bridget Daly and Colleen Pinkelman} } @article {2698, title = {Evaluation of a New Method for Providing Full Review Opportunities in Computerized Adaptive Testing{\textemdash}Computerized Adaptive Testing With Salt}, journal = {Journal of Educational Measurement}, volume = {55}, number = {4}, year = {2018}, pages = {582-594}, abstract = {Abstract Allowing item review in computerized adaptive testing (CAT) is getting more attention in the educational measurement field as more and more testing programs adopt CAT. The research literature has shown that allowing item review in an educational test could result in more accurate estimates of examinees{\textquoteright} abilities. The practice of item review in CAT, however, is hindered by the potential danger of test-manipulation strategies. To provide review opportunities to examinees while minimizing the effect of test-manipulation strategies, researchers have proposed different algorithms to implement CAT with restricted revision options. In this article, we propose and evaluate a new method that implements CAT without any restriction on item review. In particular, we evaluate the new method in terms of the accuracy on ability estimates and the robustness against test-manipulation strategies. This study shows that the newly proposed method is promising in a win-win situation: examinees have full freedom to review and change answers, and the impacts of test-manipulation strategies are undermined.}, doi = {10.1111/jedm.12193}, url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/jedm.12193}, author = {Cui, Zhongmin and Liu, Chunyan and He, Yong and Chen, Hanwei} } @article {2604, title = {Factors Affecting the Classification Accuracy and Average Length of a Variable-Length Cognitive Diagnostic Computerized Test}, journal = {Journal of Computerized Adaptive Testing}, volume = {6}, year = {2018}, pages = {1-14}, doi = {10.7333/1802-060101}, url = {http://iacat.org/jcat/index.php/jcat/article/view/55/30}, author = {Huebner, Alan and Finkelman, Matthew D. and Weissman, Alexander} } @article {2687, title = {From Simulation to Implementation: Two CAT Case Studies}, journal = {Practical Assessment, Research \& Evaluation }, volume = {23}, year = {2018}, url = {http://pareonline.net/getvn.asp?v=23\&n=14}, author = {John J Barnard} } @article {2692, title = {A Hybrid Strategy to Construct Multistage Adaptive Tests}, journal = {Applied Psychological Measurement}, volume = {42}, number = {8}, year = {2018}, pages = {630-643}, abstract = {How to effectively construct multistage adaptive test (MST) panels is a topic that has spurred recent advances. The most commonly used approaches for MST assembly use one of two strategies: bottom-up and top-down. The bottom-up approach splits the whole test into several modules, and each module is built first, then all modules are compiled to obtain the whole test, while the top-down approach follows the opposite direction. Both methods have their pros and cons, and sometimes neither is convenient for practitioners. This study provides an innovative hybrid strategy to build optimal MST panels efficiently most of the time. Empirical data and results by using this strategy will be provided.}, doi = {10.1177/0146621618762739}, url = {https://doi.org/10.1177/0146621618762739}, author = {Xinhui Xiong} } @article {2685, title = {Implementing Three CATs Within Eighteen Months}, journal = {Journal of Computerized Adaptive Testing}, volume = {6}, year = {2018}, month = {09/2018}, pages = {38-55}, doi = {10.7333/1809-060338}, url = {http://iacat.org/jcat/index.php/jcat/article/view/70/33}, author = {Christian Spoden and Andreas Frey and Raphael Bernhardt} } @article {2626, title = {Item Selection Criteria With Practical Constraints in Cognitive Diagnostic Computerized Adaptive Testing}, journal = {Educational and Psychological Measurement}, year = {2018}, pages = {0013164418790634}, abstract = {For item selection in cognitive diagnostic computerized adaptive testing (CD-CAT), ideally, a single item selection index should be created to simultaneously regulate precision, exposure status, and attribute balancing. For this purpose, in this study, we first proposed an attribute-balanced item selection criterion, namely, the standardized weighted deviation global discrimination index (SWDGDI), and subsequently formulated the constrained progressive index (CP\_SWDGDI) by casting the SWDGDI in a progressive algorithm. A simulation study revealed that the SWDGDI method was effective in balancing attribute coverage and the CP\_SWDGDI method was able to simultaneously balance attribute coverage and item pool usage while maintaining acceptable estimation precision. This research also demonstrates the advantage of a relatively low number of attributes in CD-CAT applications.}, doi = {10.1177/0013164418790634}, url = {https://doi.org/10.1177/0013164418790634}, author = {Chuan-Ju Lin and Hua-Hua Chang} } @article {2693, title = {Item Selection Methods in Multidimensional Computerized Adaptive Testing With Polytomously Scored Items}, journal = {Applied Psychological Measurement}, volume = {42}, number = {8}, year = {2018}, pages = {677-694}, abstract = {Multidimensional computerized adaptive testing (MCAT) has been developed over the past decades, and most of them can only deal with dichotomously scored items. However, polytomously scored items have been broadly used in a variety of tests for their advantages of providing more information and testing complicated abilities and skills. The purpose of this study is to discuss the item selection algorithms used in MCAT with polytomously scored items (PMCAT). Several promising item selection algorithms used in MCAT are extended to PMCAT, and two new item selection methods are proposed to improve the existing selection strategies. Two simulation studies are conducted to demonstrate the feasibility of the extended and proposed methods. The simulation results show that most of the extended item selection methods for PMCAT are feasible and the new proposed item selection methods perform well. Combined with the security of the pool, when two dimensions are considered (Study 1), the proposed modified continuous entropy method (MCEM) is the ideal of all in that it gains the lowest item exposure rate and has a relatively high accuracy. As for high dimensions (Study 2), results show that mutual information (MUI) and MCEM keep relatively high estimation accuracy, and the item exposure rates decrease as the correlation increases.}, doi = {10.1177/0146621618762748}, url = {https://doi.org/10.1177/0146621618762748}, author = {Dongbo Tu and Yuting Han and Yan Cai and Xuliang Gao} } @article {2624, title = {Latent Class Analysis of Recurrent Events in Problem-Solving Items}, journal = {Applied Psychological Measurement}, volume = {42}, number = {6}, year = {2018}, pages = {478-498}, abstract = {Computer-based assessment of complex problem-solving abilities is becoming more and more popular. In such an assessment, the entire problem-solving process of an examinee is recorded, providing detailed information about the individual, such as behavioral patterns, speed, and learning trajectory. The problem-solving processes are recorded in a computer log file which is a time-stamped documentation of events related to task completion. As opposed to cross-sectional response data from traditional tests, process data in log files are massive and irregularly structured, calling for effective exploratory data analysis methods. Motivated by a specific complex problem-solving item {\textquotedblleft}Climate Control{\textquotedblright} in the 2012 Programme for International Student Assessment, the authors propose a latent class analysis approach to analyzing the events occurred in the problem-solving processes. The exploratory latent class analysis yields meaningful latent classes. Simulation studies are conducted to evaluate the proposed approach.}, doi = {10.1177/0146621617748325}, url = {https://doi.org/10.1177/0146621617748325}, author = {Haochen Xu and Guanhua Fang and Yunxiao Chen and Jingchen Liu and Zhiliang Ying} } @article {2612, title = {Measuring patient-reported outcomes adaptively: Multidimensionality matters!}, journal = {Applied Psychological Measurement}, year = {2018}, doi = {10.1177/0146621617733954}, author = {Paap, Muirne C. S. and Kroeze, Karel A. and Glas, C. A. W. and Terwee, C. B. and van der Palen, Job and Veldkamp, Bernard P.} } @article {2699, title = {On-the-Fly Constraint-Controlled Assembly Methods for Multistage Adaptive Testing for Cognitive Diagnosis}, journal = {Journal of Educational Measurement}, volume = {55}, number = {4}, year = {2018}, pages = {595-613}, abstract = {Abstract This study applied the mode of on-the-fly assembled multistage adaptive testing to cognitive diagnosis (CD-OMST). Several and several module assembly methods for CD-OMST were proposed and compared in terms of measurement precision, test security, and constrain management. The module assembly methods in the study included the maximum priority index method (MPI), the revised maximum priority index (RMPI), the weighted deviation model (WDM), and the two revised Monte Carlo methods (R1-MC, R2-MC). Simulation results showed that on the whole the CD-OMST performs well in that it not only has acceptable attribute pattern correct classification rates but also satisfies both statistical and nonstatistical constraints; the RMPI method was generally better than the MPI method, the R2-MC method was generally better than the R1-MC method, and the two revised Monte Carlo methods performed best in terms of test security and constraint management, whereas the RMPI and WDM methods worked best in terms of measurement precision. The study is not only expected to provide information about how to combine MST and CD using an on-the-fly method and how do these assembled methods in CD-OMST perform relative to each other but also offer guidance for practitioners to assemble modules in CD-OMST with both statistical and nonstatistical constraints.}, doi = {10.1111/jedm.12194}, url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/jedm.12194}, author = {Liu, Shuchang and Cai, Yan and Tu, Dongbo} } @article {2610, title = {Some recommendations for developing multidimensional computerized adaptive tests for patient-reported outcomes}, journal = {Quality of Life Research}, volume = {27}, number = {4}, year = {2018}, month = {Apr}, pages = {1055{\textendash}1063}, abstract = {Multidimensional item response theory and computerized adaptive testing (CAT) are increasingly used in mental health, quality of life (QoL), and patient-reported outcome measurement. Although multidimensional assessment techniques hold promises, they are more challenging in their application than unidimensional ones. The authors comment on minimal standards when developing multidimensional CATs.}, issn = {1573-2649}, doi = {10.1007/s11136-018-1821-8}, url = {https://doi.org/10.1007/s11136-018-1821-8}, author = {Smits, Niels and Paap, Muirne C. S. and B{\"o}hnke, Jan R.} } @article {2696, title = {A Top-Down Approach to Designing the Computerized Adaptive Multistage Test}, journal = {Journal of Educational Measurement}, volume = {55}, number = {2}, year = {2018}, pages = {243-263}, abstract = {Abstract The top-down approach to designing a multistage test is relatively understudied in the literature and underused in research and practice. This study introduced a route-based top-down design approach that directly sets design parameters at the test level and utilizes the advanced automated test assembly algorithm seeking global optimality. The design process in this approach consists of five sub-processes: (1) route mapping, (2) setting objectives, (3) setting constraints, (4) routing error control, and (5) test assembly. Results from a simulation study confirmed that the assembly, measurement and routing results of the top-down design eclipsed those of the bottom-up design. Additionally, the top-down design approach provided unique insights into design decisions that could be used to refine the test. Regardless of these advantages, it is recommended applying both top-down and bottom-up approaches in a complementary manner in practice.}, doi = {10.1111/jedm.12174}, url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/jedm.12174}, author = {Luo, Xiao and Kim, Doyoung} } @article {2603, title = {Using Automatic Item Generation to Create Solutions and Rationales for Computerized Formative Testing}, journal = {Applied Psychological Measurement}, volume = {42}, number = {1}, year = {2018}, pages = {42-57}, abstract = {Computerized testing provides many benefits to support formative assessment. However, the advent of computerized formative testing has also raised formidable new challenges, particularly in the area of item development. Large numbers of diverse, high-quality test items are required because items are continuously administered to students. Hence, hundreds of items are needed to develop the banks necessary for computerized formative testing. One promising approach that may be used to address this test development challenge is automatic item generation. Automatic item generation is a relatively new but rapidly evolving research area where cognitive and psychometric modeling practices are used to produce items with the aid of computer technology. The purpose of this study is to describe a new method for generating both the items and the rationales required to solve the items to produce the required feedback for computerized formative testing. The method for rationale generation is demonstrated and evaluated in the medical education domain.}, doi = {10.1177/0146621617726788}, url = {https://doi.org/10.1177/0146621617726788}, author = {Mark J. Gierl and Hollis Lai} } @article {2625, title = {What Information Works Best?: A Comparison of Routing Methods}, journal = {Applied Psychological Measurement}, volume = {42}, number = {6}, year = {2018}, pages = {499-515}, abstract = {There are many item selection methods proposed for computerized adaptive testing (CAT) applications. However, not all of them have been used in computerized multistage testing (ca-MST). This study uses some item selection methods as a routing method in ca-MST framework. These are maximum Fisher information (MFI), maximum likelihood weighted information (MLWI), maximum posterior weighted information (MPWI), Kullback{\textendash}Leibler (KL), and posterior Kullback{\textendash}Leibler (KLP). The main purpose of this study is to examine the performance of these methods when they are used as a routing method in ca-MST applications. These five information methods under four ca-MST panel designs and two test lengths (30 items and 60 items) were tested using the parameters of a real item bank. Results were evaluated with overall findings (mean bias, root mean square error, correlation between true and estimated thetas, and module exposure rates) and conditional findings (conditional absolute bias, standard error of measurement, and root mean square error). It was found that test length affected the outcomes much more than other study conditions. Under 30-item conditions, 1-3 designs outperformed other panel designs. Under 60-item conditions, 1-3-3 designs were better than other panel designs. Each routing method performed well under particular conditions; there was no clear best method in the studied conditions. The recommendations for routing methods in any particular condition were provided for researchers and practitioners as well as the limitations of these results.}, doi = {10.1177/0146621617752990}, url = {https://doi.org/10.1177/0146621617752990}, author = {Halil Ibrahim Sari and Anthony Raborn} } @conference {2667, title = {Adapting Linear Models for Optimal Test Design to More Complex Test Specifications}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

Combinatorial optimization (CO) has proven to be a very helpful approach for addressing test assembly issues and for providing solutions. Furthermore, CO has been applied for several test designs, including: (1) for the development of linear test forms; (2) for computerized adaptive testing and; (3) for multistage testing. In his seminal work, van der Linden (2006) laid out the basis for using linear models for simultaneously assembling exams and item pools in a variety of conditions: (1) for single tests and multiple tests; (2) with item sets, etc. However, for some testing programs, the number and complexity of test specifications can grow rapidly. Consequently, the mathematical representation of the test assembly problem goes beyond most approaches reported either in van der Linden\’s book or in the majority of other publications related to test assembly. In this presentation, we extend van der Linden\’s framework by including the concept of blocks for test specifications. We modify the usual mathematical notation of a test assembly problem by including this concept and we show how it can be applied to various test designs. Finally, we will demonstrate an implementation of this approach in a stand-alone software, called the ATASolver.

Session Video

}, keywords = {Complex Test Specifications, Linear Models, Optimal Test Design}, author = {Maxim Morin} } @conference {2651, title = {Adaptive Item and Feedback Selection in Personalized Learning with a Network Approach}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

Personalized learning is a term used to describe educational systems that adapt student-specific curriculum sequencing, pacing, and presentation based on their unique backgrounds, knowledge, preferences, interests, and learning goals. (Chen, 2008; Netcoh, 2016). The technological approach to personalized learning provides data-driven models to incorporate these adaptations automatically. Examples of applications include online learning systems, educational games, and revision-aid systems. In this study we introduce Bayesian networks as a methodology to implement an adaptive framework within a personalized learning environment. Existing ideas from Computerized Adaptive Testing (CAT) with Item Response Theory (IRT), where choices about content provision are based on maximizing information, are related to the goals of personalized learning environments. Personalized learning entails other goals besides efficient ability estimation by maximizing information, such as an adaptive configuration of preferences and feedback to the student. These considerations will be discussed and their application in networks will be illustrated.

Adaptivity in Personalized Learning.In standard CAT\’s there is a focus on selecting items that provide maximum information about the ability of an individual at a certain point in time (Van der Linden \& Glas, 2000). When learning is the main goal of testing, alternative adaptive item selection methods were explored by Eggen (2012). The adaptive choices made in personalized learning applications require additional adaptivity with respect to the following aspects; the moment of feedback, the kind of feedback, and the possibility for students to actively influence the learning process.

Bayesian Networks and Personalized Learning.Personalized learning aims at constructing a framework to incorporate all the aspects mentioned above. Therefore, the goal of this framework is not only to focus on retrieving ability estimates by choosing items on maximum information, but also to construct a framework that allows for these other factors to play a role. Plajner and Vomlel (2016) have already applied Bayesian Networks to adaptive testing, selecting items with help of entropy reduction. Almond et al. (2015) provide a reference work on Bayesian Networks in Educational Assessment. Both acknowledge the potential of the method in terms of features such as modularity options to build finer-grained models. IRT does not allow to model sub-skills very easily and to gather information on fine-grained level, due to its dependency on the assumption of generally one underlying trait. The local independence assumption in IRT implies being interested in mainly the student\’s overall ability on the subject of interest. When the goal is to improve student\’s learning, we are not just interested in efficiently coming to their test score on a global subject. One wants a model that is able to map\ educational problems and talents in detail over the whole educational program, while allowing for dependency between items. The moment in time can influence topics to be better mastered than others, and this is exactly what we can to get out of a model. The possibility to model flexible structures, estimate abilities on a very detailed level for sub-skills and to easily incorporate other variables such as feedback in Bayesian Networks makes it a very promising method for making adaptive choices in personalized learning. It is shown in this research how item and feedback selection can be performed with help of the promising Bayesian Networks. A student involvement possibility is also introduced and evaluated.

References

Almond, R. G., Mislevy, R. J., Steinberg, L. S., Yan, D., \& Williamson, D. M. (2015). Bayesian Networks in Educational Assessment. Test. New York: Springer Science+Business Media. http://doi.org/10.1007/978-0-387-98138-3

Eggen, T.J.H.M. (2012) Computerized Adaptive Testing Item Selection in Computerized Adaptive Learning Systems. In: Eggen. TJHM \& Veldkamp, BP.. (Eds). Psychometrics in Practice at RCEC. Enschede: RCEC

Netcoh, S. (2016, March). \“What Do You Mean by \‘Personalized Learning?\’. Croscutting Conversations in Education \– Research, Reflections \& Practice. Blogpost.

Plajner, M., \& Vomlel, J. (2016). Student Skill Models in Adaptive Testing. In Proceedings of the Eighth International Conference on Probabilistic Graphical Models (pp. 403-414).

Van der Linden, W. J., \& Glas, C. A. (2000). Computerized adaptive testing: Theory and practice. Dordrecht: Kluwer Academic Publishers.

Session Video

}, keywords = {feedback selection, item selection, network approach, personalized learning}, author = {Nikky van Buuren and Hendrik Straat and Theo Eggen and Jean-Paul Fox} } @conference {2653, title = {Adaptivity in a Diagnostic Educational Test}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

During the past five years a diagnostic educational test for three subjects (writing Dutch, writing English and math) has been developed in the Netherlands. The test informs students and their teachers about the students\’ strengths and weaknesses in such a manner that the learning process can be adjusted to their personal needs. It is a computer-based assessment for students in five different educational tracks midway secondary education that can yield diagnoses of many sub-skills. One of the main challenges at the outset of the development was to devise a way to deliver many diagnoses within a reasonably testing time. The answer to this challenge was to make the DET adaptive.

In this presentation we will discuss first how the adaptivity is shaped towards the purpose of the Diagnostic Educational Test. The adaptive design, particularly working with item blocks, will be discussed as well as the implemented adaptive rules. We will also show a simulation of different adaptive paths of students and some empirical information on the paths students took through the test

Session Video

}, keywords = {CAT, Diagnostic tests, Education}, author = {Sanneke Schouwstra} } @conference {2658, title = {Analysis of CAT Precision Depending on Parameters of the Item Pool}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

The purpose of this research project is to analyze the measurement precision of a latent variable depending on parameters of the item pool. The influence of the following factors is analyzed:

Factor A \– range of variation of items in the pool. This factor varies on three levels with the following ranges in logits: a1 \– [-3.0; +3.0], a2 - [-4.0; +4.0], a3 - [-5.0; +5.0].

Factor B \– number of items in the pool. The factor varies on six levels with the following number of items for every factor: b1 - 128, b2 - 256, b3 \– 512, b4 - 1024, b5 \– 2048, b6 \– 4096. The items are evenly distributed in each of the variation ranges.

Factor C \– examinees\’ proficiency varies at 30 levels (c1, c2, \…, c30), which are evenly distributed in the range [-3.0; +3.0] logit.

The investigation was based on a simulation experiment within the framework of the theory of latent variables.

Response Y is the precision of measurement of examinees\’ proficiency, which is calculated as the difference between the true levels of examinees\’ proficiency and estimates obtained by means of adaptive testing. Three factor ANOVA was used for data processing.

The following results were obtained:

1. Factor A is significant. Ceteris paribus, the greater the range of variation of items in the pool, the higher the estimation precision is.

2. Factor B is significant. Ceteris paribus, the greater the number of items in the pool, the higher the estimation precision is.

3. Factor C is statistically insignificant at level \α = .05. It means that the precision of estimation of examinees\’ proficiency is the same within the range of their variation.

4. The only significant interaction among all interactions is AB. The significance of this interaction is explained by the fact that increasing the number of items in the pool decreases the effect of the range of variation of items in the pool.\ 

Session Video

}, keywords = {CAT, Item parameters, Precision}, url = {https://drive.google.com/file/d/1Bwe58kOQRgCSbB8x6OdZTDK4OIm3LQI3/view?usp=drive_web}, author = {Anatoly Maslak and Stanislav Pozdniakov} } @article {2602, title = {Application of Binary Searching for Item Exposure Control in Cognitive Diagnostic Computerized Adaptive Testing}, journal = {Applied Psychological Measurement}, volume = {41}, number = {7}, year = {2017}, pages = {561-576}, abstract = {Cognitive diagnosis has emerged as a new generation of testing theory for educational assessment after the item response theory (IRT). One distinct feature of cognitive diagnostic models (CDMs) is that they assume the latent trait to be discrete instead of continuous as in IRT. From this perspective, cognitive diagnosis bears a close resemblance to searching problems in computer science and, similarly, item selection problem in cognitive diagnostic computerized adaptive testing (CD-CAT) can be considered as a dynamic searching problem. Previously, item selection algorithms in CD-CAT were developed from information indices in information science and attempted to achieve a balance among several objectives by assigning different weights. As a result, they suffered from low efficiency from a tug-of-war competition among multiple goals in item selection and, at the same time, put an undue responsibility of assigning the weights for these goals by trial and error on users. Based on the searching problem perspective on CD-CAT, this article adapts the binary searching algorithm, one of the most well-known searching algorithms in searching problems, to item selection in CD-CAT. The two new methods, the stratified dynamic binary searching (SDBS) algorithm for fixed-length CD-CAT and the dynamic binary searching (DBS) algorithm for variable-length CD-CAT, can achieve multiple goals without any of the aforementioned issues. The simulation studies indicate their performances are comparable or superior to the previous methods.}, doi = {10.1177/0146621617707509}, url = {https://doi.org/10.1177/0146621617707509}, author = {Chanjin Zheng and Chun Wang} } @article {2600, title = {ATS-PD: An Adaptive Testing System for Psychological Disorders}, journal = {Educational and Psychological Measurement}, volume = {77}, number = {5}, year = {2017}, pages = {792-815}, abstract = {The clinical assessment of mental disorders can be a time-consuming and error-prone procedure, consisting of a sequence of diagnostic hypothesis formulation and testing aimed at restricting the set of plausible diagnoses for the patient. In this article, we propose a novel computerized system for the adaptive testing of psychological disorders. The proposed system combines a mathematical representation of psychological disorders, known as the {\textquotedblleft}formal psychological assessment,{\textquotedblright} with an algorithm designed for the adaptive assessment of an individual{\textquoteright}s knowledge. The assessment algorithm is extended and adapted to the new application domain. Testing the system on a real sample of 4,324 healthy individuals, screened for obsessive-compulsive disorder, we demonstrate the system{\textquoteright}s ability to support clinical testing, both by identifying the correct critical areas for each individual and by reducing the number of posed questions with respect to a standard written questionnaire.}, doi = {10.1177/0013164416652188}, url = {https://doi.org/10.1177/0013164416652188}, author = {Ivan Donadello and Andrea Spoto and Francesco Sambo and Silvana Badaloni and Umberto Granziol and Giulio Vidotto} } @conference {2664, title = {Bayesian Perspectives on Adaptive Testing}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

Although adaptive testing is usually treated from the perspective of maximum-likelihood parameter estimation and maximum-informaton item selection, a Bayesian pespective is more natural, statistically efficient, and computationally tractable. This observation not only holds for the core process of ability estimation but includes such processes as item calibration, and real-time monitoring of item security as well. Key elements of the approach are parametric modeling of each relevant process, updating of the parameter estimates after the arrival of each new response, and optimal design of the next step.

The purpose of the symposium is to illustrates the role of Bayesian statistics in this approach. The first presentation discusses a basic Bayesian algorithm for the sequential update of any parameter in adaptive testing and illustrates the idea of Bayesian optimal design for the two processes of ability estimation and online item calibration. The second presentation generalizes the ideas to the case of 62 IACAT 2017 ABSTRACTS BOOKLET adaptive testing with polytomous items. The third presentation uses the fundamental Bayesian idea of sampling from updated posterior predictive distributions (\“multiple imputations\”) to deal with the problem of scoring incomplete adaptive tests.

Session Video 1

Session Video 2

}, keywords = {Bayesian Perspective, CAT}, author = {Wim J. van der Linden and Bingnan Jiang and Hao Ren and Seung W. Choi and Qi Diao} } @conference {2641, title = {Is CAT Suitable for Automated Speaking Test?}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

We have developed automated scoring system of Japanese speaking proficiency, namely SJ-CAT (Speaking Japanese Computerized Adaptive Test), which is operational for last few months. One of the unique features of the test is an adaptive test base on polytomous IRT.

SJ-CAT consists of two sections; Section 1 has sentence reading aloud tasks and a multiple choicereading tasks and Section 2 has sentence generation tasks and an open answer tasks. In reading aloud tasks, a test taker reads a phoneme-balanced sentence on the screen after listening to a model reading. In a multiple choice-reading task, a test taker sees a picture and reads aloud one sentence among three sentences on the screen, which describe the scene most appropriately. In a sentence generation task, a test taker sees a picture or watches a video clip and describes the scene with his/her own words for about ten seconds. In an open answer tasks, the test taker expresses one\’s support for or opposition to e.g., a nuclear power generation with reasons for about 30 seconds.

In the course of the development of the test, we found many unexpected and unique characteristics of speaking CAT, which are not found in usual CATs with multiple choices. In this presentation, we will discuss some of such factors that are not previously noticed in our previous project of developing dichotomous J-CAT (Japanese Computerized Adaptive Test), which consists of vocabulary, grammar, reading, and listening. Firstly, we will claim that distribution of item difficulty parameters depends on the types of items. An item pool with unrestricted types of items such as open questions is difficult to achieve ideal distributions, either normal distribution or uniform distribution. Secondly, contrary to our expectations, open questions are not necessarily more difficult to operate in automated scoring system than more restricted questions such as sentence reading, as long as if one can set up suitable algorithm for open question scoring. Thirdly, we will show that the speed of convergence of standard deviation of posterior distribution, or standard error of theta parameter in polytomous IRT used for SJCAT is faster than dichotomous IRT used in J-CAT. Fourthly, we will discuss problems in equation of items in SJ-CAT, and suggest introducing deep learning with reinforcement learning instead of equation. And finally, we will discuss the issues of operation of SJ-CAT on the web, including speed of scoring, operation costs, security among others.

Session Video

}, keywords = {Automated Speaking Test, CAT, language testing}, author = {Shingo Imai} } @conference {2637, title = {Comparison of Pretest Item Calibration Methods in a Computerized Adaptive Test (CAT)}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

Calibration methods for pretest items in a computerized adaptive test (CAT) are not a new area of research inquiry. After decades of research on CAT, the fixed item parameter calibration (FIPC) method has been widely accepted and used by practitioners to address two CAT calibration issues: (a) a restricted ability range each item is exposed to, and (b) a sparse response data matrix. In FIPC, the parameters of the operational items are fixed at their original values, and multiple expectation maximization (EM) cycles are used to estimate parameters of the pretest items with prior ability distribution being updated multiple times (Ban, Hanson, Wang, Yi, \& Harris, 2001; Kang \& Peterson, 2009; Pommerich \& Segall, 2003).

Another calibration method is the fixed person parameter calibration (FPPC) method proposed by Stocking (1988) as \“Method A.\” Under this approach, candidates\’ ability estimates are fixed in the calibration of pretest items and they define the scale on which the parameter estimates are reported. The logic of FPPC is suitable for CAT applications because the person parameters are estimated based on operational items and available for pretest item calibration. In Stocking (1988), the FPPC was evaluated using the LOGIST computer program developed by Wood, Wingersky, and Lord (1976). He reported that \“Method A\” produced larger root mean square errors (RMSEs) in the middle ability range than \“Method B,\” which required the use of anchor items (administered non-adaptively) and linking steps to attempt to correct for the potential scale drift due to the use of imperfect ability estimates.

Since then, new commercial software tools such as BILOG-MG and flexMIRT (Cai, 2013) have been developed to handle the FPPC method with different implementations (e.g., the MH-RM algorithm with flexMIRT). The performance of the FPPC method with those new software tools, however, has rarely been researched in the literature.

In our study, we evaluated the performance of two pretest item calibration methods using flexMIRT, the new software tool. The FIPC and FPPC are compared under various CAT settings. Each simulated exam contains 75\% operational items and 25\% pretest items, and real item parameters are used to generate the CAT data. This study also addresses the lack of guidelines in existing CAT item calibration literature regarding population ability shift and exam length (more accurate theta estimates are expected in longer exams). Thus, this study also investigates the following four factors and their impact on parameter estimation accuracy, including: (1) candidate population changes (3 ability distributions); (2) exam length (20: 15 OP + 5 PT, 40: 30 OP + 10 PT, and 60: 45 OP + 15 PT); (3) data model fit (3PL and 3PL with fixed C), and (4) pretest item calibration sample sizes (300, 500, and 1000). This study\’s findings will fill the gap in this area of research and thus provide new information on which practitioners can base their decisions when selecting a pretest calibration method for their exams.

References

Ban, J. C., Hanson, B. A., Wang, T., Yi, Q., \& Harris, D. J. (2001). A comparative study of online pretest item\—Calibration/scaling methods in computerized adaptive testing. Journal of Educational Measurement, 38(3), 191\–212.

Cai, L. (2013). flexMIRT\® Flexible Multilevel Multidimensional Item Analysis and Test Scoring (Version 2) [Computer software]. Chapel Hill, NC: Vector Psychometric Group.

Kang, T., \& Petersen, N. S. (2009). Linking item parameters to a base scale (Research Report No. 2009\– 2). Iowa City, IA: ACT.

Pommerich, M., \& Segall, D.O. (2003, April). Calibrating CAT pools and online pretest items using marginal maximum likelihood methods. Paper presented at the annual meeting of the National Council on Measurement in Education, Chicago, IL.

Stocking, M. L. (1988). Scale drift in online calibration (Research Report No. 88\–28). Princeton, NJ: Educational Testing Service.

Wood, R. L., Wingersky, M. S., \& Lord, F. M. (1976). LOGIST: A computer program for estimating examinee ability and item characteristic curve parameters (RM76-6) [Computer program]. Princeton, NJ: Educational Testing Service.

Session Video

}, keywords = {CAT, Pretest Item Calibration}, author = {Huijuan Meng and Chris Han} } @conference {2632, title = {A Comparison of Three Empirical Reliability Estimates for Computerized Adaptive Testing}, booktitle = {IACAT 2017 conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

Reliability estimates in Computerized Adaptive Testing (CAT) are derived from estimated thetas and standard error of estimated thetas. In practical, the observed standard error (OSE) of estimated thetas can be estimated by test information function for each examinee with respect to Item response theory (IRT). Unlike classical test theory (CTT), OSEs in IRT are conditional values given each estimated thetas so that those values should be marginalized to consider test reliability. Arithmetic mean, Harmonic mean, and Jensen equality were applied to marginalize OSEs to estimate CAT reliability. Based on different marginalization method, three empirical CAT reliabilities were compared with true reliabilities. Results showed that three empirical CAT reliabilities were underestimated compared to true reliability in short test length (\< 40), whereas the magnitude of CAT reliabilities was followed by Jensen equality, Harmonic mean, and Arithmetic mean in long test length (\> 40). Specifically, Jensen equality overestimated true reliability across all conditions in long test length (\>50).

Session Video\ 

}, keywords = {CAT, Reliability}, url = {https://drive.google.com/file/d/1gXgH-epPIWJiE0LxMHGiCAxZZAwy4dAH/view?usp=sharing}, author = {Dong Gi Seo} } @article {2601, title = {Is a Computerized Adaptive Test More Motivating Than a Fixed-Item Test?}, journal = {Applied Psychological Measurement}, volume = {41}, number = {7}, year = {2017}, pages = {495-511}, abstract = {Computer adaptive tests provide important measurement advantages over traditional fixed-item tests, but research on the psychological reactions of test takers to adaptive tests is lacking. In particular, it has been suggested that test-taker engagement, and possibly test performance as a consequence, could benefit from the control that adaptive tests have on the number of test items examinees answer correctly. However, previous research on this issue found little support for this possibility. This study expands on previous research by examining this issue in the context of a mathematical ability assessment and by considering the possible effect of immediate feedback of response correctness on test engagement, test anxiety, time on task, and test performance. Middle school students completed a mathematics assessment under one of three test type conditions (fixed, adaptive, or easier adaptive) and either with or without immediate feedback about the correctness of responses. Results showed little evidence for test type effects. The easier adaptive test resulted in higher engagement and lower anxiety than either the adaptive or fixed-item tests; however, no significant differences in performance were found across test types, although performance was significantly higher across all test types when students received immediate feedback. In addition, these effects were not related to ability level, as measured by the state assessment achievement levels. The possibility that test experiences in adaptive tests may not in practice be significantly different than in fixed-item tests is raised and discussed to explain the results of this and previous studies.}, doi = {10.1177/0146621617707556}, url = {https://doi.org/10.1177/0146621617707556}, author = {Guangming Ling and Yigal Attali and Bridgid Finn and Elizabeth A. Stone} } @conference {2655, title = {Computerized Adaptive Testing for Cognitive Diagnosis in Classroom: A Nonparametric Approach}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

In the past decade, CDMs of educational test performance have received increasing attention among educational researchers (for details, see Fu \& Li, 2007, and Rupp, Templin, \& Henson, 2010). CDMs of educational test performance decompose the ability domain of a given test into specific skills, called attributes, each of which an examinee may or may not have mastered. The resulting attribute profile documents the individual\’s strengths and weaknesses within the ability domain. The Cognitive Diagnostic Computerized Adaptive Testing (CD-CAT) has been suggested by researchers as a diagnostic tool for assessment and evaluation (e.g., Cheng \& Chang, 2007; Cheng, 2009; Liu, You, Wang, Ding, \& Chang, 2013; Tatsuoka \& Tatsuoka, 1997). While model-based CD-CAT is relatively well-researched in the context of large-scale assessments, this type of system has not received the same degree of development in small-scale settings, where it would be most useful. The main challenge is that the statistical estimation techniques successfully applied to the parametric CD-CAT require large samples to guarantee the reliable calibration of item parameters and accurate estimation of examinees\’ attribute profiles. In response to the challenge, a nonparametric approach that does not require any parameter calibration, and thus can be used in small educational programs, is proposed. The proposed nonparametric CD-CAT relies on the same principle as the regular CAT algorithm, but uses the nonparametric classification method (Chiu \& Douglas, 2013) to assess and update the student\’s ability state while the test proceeds. Based on a student\’s initial responses, 2 a neighborhood of candidate proficiency classes is identified, and items not characteristic of the chosen proficiency classes are precluded from being chosen next. The response to the next item then allows for an update of the skill profile, and the set of possible proficiency classes is further narrowed. In this manner, the nonparametric CD-CAT cycles through item administration and update stages until the most likely proficiency class has been pinpointed. The simulation results show that the proposed method outperformed the compared parametric CD-CAT algorithms and the differences were significant when the item parameter calibration was not optimal.

References

Cheng, Y. (2009). When cognitive diagnosis meets computerized adaptive testing: CD-CAT. Psychometrika, 74, 619-632.

Cheng, Y., \& Chang, H. (2007). The modified maximum global discrimination index method for cognitive diagnostic CAT. In D. Weiss (Ed.) Proceedings of the 2007 GMAC Computerized Adaptive Testing Conference.

Chiu, C.-Y., \& Douglas, J. A. (2013). A nonparametric approach to cognitive diagnosis by proximity to ideal response patterns. Journal of Classification, 30, 225-250.

Fu, J., \& Li, Y. (2007). An integrative review of cognitively diagnostic psychometric models. Paper presented at the Annual Meeting of the National Council on Measurement in Education. Chicago, Illinois.

Liu, H., You, X., Wang, W., Ding, S., \& Chang, H. (2013). The development of computerized adaptive testing with cognitive diagnosis for an English achievement test in China. Journal of Classification, 30, 152-172.

Rupp, A. A., \& Templin, J. L., \& Henson, R. A. (2010). Diagnostic Measurement. Theory, Methods, and Applications. New York: Guilford.

Tatsuoka, K.K., \& Tatsuoka, M.M. (1997), Computerized cognitive diagnostic adaptive testing: Effect on remedial instruction as empirical validation. Journal of Educational Measurement, 34, 3\–20.

Session Video

}, keywords = {CD-CAT, non-parametric approach}, author = {Yuan-Pei Chang and Chia-Yi Chiu and Rung-Ching Tsai} } @conference {2654, title = {Concerto 5 Open Source CAT Platform: From Code to Nodes}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

Concerto 5 is the newest version of the Concerto open source R-based Computer-Adaptive Testing platform, which is currently used in educational testing and in clinical trials. In our quest to make CAT accessible to all, the latest version uses flowchart nodes to connect different elements of a test, so that CAT test creation is an intuitive high-level process that does not require writing code.

A test creator might connect an Info Page node, to a Consent Page node, to a CAT node, to a Feedback node. And after uploading their items, their test is done.

This talk will show the new flowchart interface, and demonstrate the creation of a CAT test from scratch in less than 10 minutes.

Concerto 5 also includes a new Polytomous CAT node, so CATs with Likert items can be easily created in the flowchart interface. This node is currently used in depression and anxiety tests in a clinical trial.

Session Video

}, keywords = {Concerto 5, Open Source CAT}, url = {https://drive.google.com/open?id=11eu1KKILQEoK5c-CYO1P1AiJgiQxX0E0}, author = {David Stillwell} } @conference {2649, title = {Considerations in Performance Evaluations of Computerized Formative Assessments}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

Computerized adaptive instruments have been widely established and used in the context of summative assessments for purposes including licensure, admissions and proficiency testing. The benefits of examinee tailored examinations, which can provide estimates of performance that are more reliable and valid, have in recent years attracted a greater audience (i.e. patient oriented outcomes, test prep, etc.). Formative assessment, which are most widely understood in their implementation as diagnostic tools, have recently started to expand to lesser known areas of computerized testing such as in implementations of instructional designs aiming to maximize examinee learning through targeted practice.

Using a CAT instrument within the framework of evaluating repetitious examinee performances (in such settings as a Quiz Bank practices for example) poses unique challenges not germane to summative assessments. The scale on which item parameters (and subsequently examinee performance estimates such as Maximum Likelihood Estimates) are determined usually do not take change over time under consideration. While vertical scaling features resolve the learning acquisition problem, most content practice engines do not make use of explicit practice windows which could be vertically aligned. Alternatively, the Multidimensional (MIRT)- and Hierarchical Item Response Theory (HIRT) models allow for the specification of random effects associated with change over time in examinees\’ skills, but are often complex and require content and usage resources not often observed.

The research submitted for consideration simulated examinees\’ repeated variable length Quiz Bank practice in algebra using a 500 1-PL operational item pool. The stability simulations sought to determine with which rolling item interval size ability estimates would provide the most informative insight into the examinees\’ learning progression over time. Estimates were evaluated in terms of reduction in estimate uncertainty, bias and RMSD with the true and total item based ability estimates. It was found that rolling item intervals between 20-25 items provided the best reduction of uncertainty around the estimate without compromising the ability to provide informed performance estimates to students. However, while asymptotically intervals of 20-25 items tended to provide adequate estimates of performance, changes over shorter periods of time assessed with shorter quizzes could not be detected as those changes would be suppressed in lieu of the performance based on the full interval considered. Implications for infrastructure (such as recommendation engines, etc.), product and scale development are discussed.

Session video

}, keywords = {algebra, Formative Assessment, Performance Evaluations}, author = {Michael Chajewski and John Harnisher} } @conference {2666, title = {Construction of Gratitude Scale Using Polytomous Item Response Theory Model}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

Various studies have shown that gratitude is essential to increase the happiness and quality of life of every individual. Unfortunately, research on gratitude still received little attention, and there is no standardized measurement for it. Gratitude measurement scale was developed overseas, and has not adapted to the Indonesian culture context. Moreover, the scale development is generally performed with classical theory approach, which has some drawbacks. This research will develop a gratitude scale using polytomous Item Response Theory model (IRT) by applying the Partial Credit Model (PCM).

The pilot study results showed that the gratitude scale (with 44 items) is a reliable measure (\α = 0.944) and valid (meet both convergent and discriminant validity requirements). The pilot study results also showed that the gratitude scale satisfies unidimensionality assumptions.

The test results using the PCM model showed that the gratitude scale had a fit model. Of 44 items, there was one item that does not fit, so it was eliminated. Second test results for the remaining 43 items showed that they fit the model, and all items were fit to measure gratitude. Analysis using Differential Item Functioning (DIF) showed four items have a response bias based on gender. Thus, there are 39 items remaining in the scale.

Session Video\ 

}, keywords = {Gratitude Scale, polytomous items}, url = {https://drive.google.com/open?id=1pHhO4cq2-wh24ht3nBAoXNHv7234_mjH}, author = {Nurul Arbiyah} } @conference {2657, title = {Developing a CAT: An Integrated Perspective}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

Most resources on computerized adaptive testing (CAT) tend to focus on psychometric aspects such as mathematical formulae for item selection or ability estimation. However, development of a CAT assessment requires a holistic view of project management, financials, content development, product launch and branding, and more. This presentation will develop such a holistic view, which serves several purposes, including providing a framework for validity, estimating costs and ROI, and making better decisions regarding the psychometric aspects.

Thompson and Weiss (2011) presented a 5-step model for developing computerized adaptive tests (CATs). This model will be presented and discussed as the core of this holistic framework, then applied to real-life examples. While most CAT research focuses on developing new quantitative algorithms, this presentation is instead intended to help researchers evaluate and select algorithms that are most appropriate for their needs. It is therefore ideal for practitioners that are familiar with the basics of item response theory and CAT, and wish to explore how they might apply these methodologies to improve their assessments.

Steps include:

1. Feasibility, applicability, and planning studies

2. Develop item bank content or utilize existing bank

3. Pretest and calibrate item bank

4. Determine specifications for final CAT

5. Publish live CAT.

So, for example, Step 1 will contain simulation studies which estimate item bank requirements, which then can be used to determine costs of content development, which in turn can be integrated into an estimated project cost timeline. Such information is vital in determining if the CAT should even be developed in the first place.

References

Thompson, N. A., \& Weiss, D. J. (2011). A Framework for the Development of Computerized Adaptive Tests. Practical Assessment, Research \& Evaluation, 16(1). Retrieved from http://pareonline.net/getvn.asp?v=16\&n=1.

Session Video

}, keywords = {CAT Development, integrated approach}, url = {https://drive.google.com/open?id=1Jv8bpH2zkw5TqSMi03e5JJJ98QtXf-Cv}, author = {Nathan Thompson} } @article {2615, title = {Development of a Computer Adaptive Test for Depression Based on the Dutch-Flemish Version of the PROMIS Item Bank}, journal = {Evaluation \& the Health Professions}, volume = {40}, number = {1}, year = {2017}, pages = {79-105}, abstract = {We developed a Dutch-Flemish version of the patient-reported outcomes measurement information system (PROMIS) adult V1.0 item bank for depression as input for computerized adaptive testing (CAT). As item bank, we used the Dutch-Flemish translation of the original PROMIS item bank (28 items) and additionally translated 28 U.S. depression items that failed to make the final U.S. item bank. Through psychometric analysis of a combined clinical and general population sample (N = 2,010), 8 added items were removed. With the final item bank, we performed several CAT simulations to assess the efficiency of the extended (48 items) and the original item bank (28 items), using various stopping rules. Both item banks resulted in highly efficient and precise measurement of depression and showed high similarity between the CAT simulation scores and the full item bank scores. We discuss the implications of using each item bank and stopping rule for further CAT development.}, doi = {10.1177/0163278716684168}, url = {https://doi.org/10.1177/0163278716684168}, author = {Gerard Flens and Niels Smits and Caroline B. Terwee and Joost Dekker and Irma Huijbrechts and Edwin de Beurs} } @conference {2668, title = {The Development of a Web-Based CAT in China}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

Cognitive ability assessment has been widely used as the recruitment tool in hiring potential employees. Traditional cognitive ability tests have been encountering threats from item-exposures and long time for answering. Especially in China, campus recruitment thinks highly of short answering time and anti-cheating. Beisen, as the biggest native online assessment software provider, developed a web-based CAT for cognitive ability which assessing verbal, quantitative, logical and spatial ability in order to decrease answering times, improve assessment accuracy and reduce threats from cheating and faking in online ability test. The web-based test provides convenient testing for examinees who can access easily to the test via internet just by login the test website at any time and any place through any Internet-enabled devices (e.g., laptops, IPADs, and smart phones).

We designed the CAT following strategies of establishing item bank, setting starting point, item selection, scoring and terminating. Additionally, we pay close attention to administrating the test via web. For the CAT procedures, we employed online calibration for establishing a stable and expanding item bank, and integrated maximum Fisher information, \α-stratified strategy and randomization for item selection and coping with item exposures. Fixed-length and variable-length strategies were combined in terminating the test. For fulfilling the fluid web-based testing, we employed cloud computing techniques and designed each computing process subtly. Distributed computation was used to process scoring which executes EAP and item selecting at high speed. Caching all items to the servers in advance helps shortening the process of loading items to examinees\’ terminal equipment. Horizontally scalable cloud servers function coping with great concurrency. The massive computation in item selecting was conversed to searching items from an information matrix table.

We examined the average accuracy, bank usage and computing performance in the condition of laboratory and real testing. According to a test for almost 28000 examinees, we found that bank usage is averagely 50\%, and that 80\% tests terminate at test information of 10 and averagely at 9.6. In context of great concurrency, the testing is unhindered and the process of scoring and item selection only takes averagely 0.23s for each examiner.

Session Video

}, keywords = {China, Web-Based CAT}, author = {Chongli Liang and Danjun Wang and Dan Zhou and Peida Zhan} } @article {2597, title = {The Development of MST Test Information for the Prediction of Test Performances}, journal = {Educational and Psychological Measurement}, volume = {77}, number = {4}, year = {2017}, pages = {570-586}, abstract = {The current study proposes novel methods to predict multistage testing (MST) performance without conducting simulations. This method, called MST test information, is based on analytic derivation of standard errors of ability estimates across theta levels. We compared standard errors derived analytically to the simulation results to demonstrate the validity of the proposed method in both measurement precision and classification accuracy. The results indicate that the MST test information effectively predicted the performance of MST. In addition, the results of the current study highlighted the relationship among the test construction, MST design factors, and MST performance.}, doi = {10.1177/0013164416662960}, url = {http://dx.doi.org/10.1177/0013164416662960}, author = {Ryoungsun Park and Jiseon Kim and Hyewon Chung and Barbara G. Dodd} } @conference {2644, title = {DIF-CAT: Doubly Adaptive CAT Using Subgroup Information to Improve Measurement Precision}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

Differential item functioning (DIF) is usually regarded as a test fairness issue in high-stakes tests. In low-stakes tests, it is more of an accuracy problem. However, in low-stakes tests, the same method, deleting items that demonstrate significant DIF, is still employed to treat DIF items. When political concerns are not important, such as in low-stakes tests and instruments that are not used to make decisions about people, deleting items might not be optimal. Computerized adaptive testing (CAT) is more and more frequently used in low-stakes tests. The DIF-CAT method evaluated in this research is designed to cope with DIF in a CAT environment. Using this method, item parameters are separately estimated for the focal group and the reference group in a DIF study, then CATs are administered based on different sets of item parameters for the focal and reference groups.

To evaluate the performance of the DIF-CAT procedure, it was compared in a simulation study to (1) deleting all the DIF items in a CAT bank and (2) ignoring DIF. A 300-item flat item bank and a 300-item peaked item bank were simulated using the three-parameter logistic IRT model with D = 1,7. 40\% of the items in each bank showed DIF. The DIF size was b and/or a = 0.5 while original b ranged from -3 to 3 and a ranged from 0.3 to 2.1. Three types of DIF were considered: (1) uniform DIF caused by differences in b, non-uniform DIF caused by differences in a, and non-uniform DIF caused by differences in both a and b. 500 normally distributed simulees in each of reference and focal groups were used in item parameter re-calibration. In the Delete DIF method, only DIF-free items were calibrated. In the Ignore DIF method, all the items were calibrated using all simulees without differentiating the groups. In the DIF-CAT method, the DIF-free items were used as anchor items to estimate the item parameters for the focal and reference groups and the item parameters from recalibration were used. All simulees used the same item parameters in the Delete method and the Ignore method. CATs for simulees within the two groups used group-specific item parameters in the DIF-CAT method. In the CAT stage, 100 simulees were generated for each of the reference and focal groups, at each of six discrete q levels ranging from -2.5 to 2.5. CAT test length was fixed at 40 items. Bias, average absolute difference, RMSE, standard error of \θ estimates, and person fit, were used to compare the performance of the DIF methods. DIF item usage was also recorded for the Ignore method and the DIF-CAT method.

Generally, the DIF-CAT method outperformed both the Delete method and the Ignore method in dealing with DIF items in CAT. The Delete method, which is the most frequently used method for handling DIF, performed the worst of the three methods in a CAT environment, as reflected in multiple\ indices of measurement precision. Even the Ignore method, which simply left DIF items in the item bank, provided \θ estimates of higher precision than the Delete method. This poor performance of the Delete method was probably due to reduction in size of the item bank available for each CAT.

Session Video

}, keywords = {DIF-CAT, Doubly Adaptive CAT, Measurement Precision, subgroup information}, url = {https://drive.google.com/open?id=1Gu4FR06qM5EZNp_Ns0Kt3HzBqWAv3LPy}, author = {Joy Wang and David J. Weiss and Chun Wang} } @article {2580, title = {Dual-Objective Item Selection Criteria in Cognitive Diagnostic Computerized Adaptive Testing}, journal = {Journal of Educational Measurement}, volume = {54}, number = {2}, year = {2017}, pages = {165{\textendash}183}, abstract = {The development of cognitive diagnostic-computerized adaptive testing (CD-CAT) has provided a new perspective for gaining information about examinees{\textquoteright} mastery on a set of cognitive attributes. This study proposes a new item selection method within the framework of dual-objective CD-CAT that simultaneously addresses examinees{\textquoteright} attribute mastery status and overall test performance. The new procedure is based on the Jensen-Shannon (JS) divergence, a symmetrized version of the Kullback-Leibler divergence. We show that the JS divergence resolves the noncomparability problem of the dual information index and has close relationships with Shannon entropy, mutual information, and Fisher information. The performance of the JS divergence is evaluated in simulation studies in comparison with the methods available in the literature. Results suggest that the JS divergence achieves parallel or more precise recovery of latent trait variables compared to the existing methods and maintains practical advantages in computation and item pool usage.}, issn = {1745-3984}, doi = {10.1111/jedm.12139}, url = {http://dx.doi.org/10.1111/jedm.12139}, author = {Kang, Hyeon-Ah and Zhang, Susu and Chang, Hua-Hua} } @conference {2656, title = {Efficiency of Item Selection in CD-CAT Based on Conjunctive Bayesian Network Modeling Hierarchical attributes}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

Cognitive diagnosis models (CDM) aim to diagnosis examinee\’s mastery status of multiple fine-grained skills. As new development for cognitive diagnosis methods emerges, much attention is given to cognitive diagnostic computerized adaptive testing (CD-CAT) as well. The topics such as item selection methods, item exposure control strategies, and online calibration methods, which have been wellstudied for traditional item response theory (IRT) based CAT, are also investigated in the context of CD-CAT (e.g., Xu, Chang, \& Douglas, 2003; Wang, Chang, \& Huebner, 2011; Chen et al., 2012).

In CDM framework, some researchers suggest to model structural relationship between cognitive skills, or namely, attributes. Especially, attributes can be hierarchical, such that some attributes must be acquired before the subsequent ones are mastered. For example, in mathematics, addition must be mastered before multiplication, which gives a hierarchy model for addition skill and multiplication skill. Recently, new CDMs considering attribute hierarchies have been suggested including the Attribute Hierarchy Method (AHM; Leighton, Gierl, \& Hunka, 2004) and the Hierarchical Diagnostic Classification Models (HDCM; Templin \& Bradshaw, 2014).

Bayesian Networks (BN), the probabilistic graphical models representing the relationship of a set of random variables using a directed acyclic graph with conditional probability distributions, also provide an efficient framework for modeling the relationship between attributes (Culbertson, 2016). Among various BNs, conjunctive Bayesian network (CBN; Beerenwinkel, Eriksson, \& Sturmfels, 2007) is a special kind of BN, which assumes partial ordering between occurrences of events and conjunctive constraints between them.

In this study, we propose using CBN for modeling attribute hierarchies and discuss the advantage of CBN for CDM. We then explore the impact of the CBN modeling on the efficiency of item selection methods for CD-CAT when the attributes are truly hierarchical. To this end, two simulation studies, one for fixed-length CAT and another for variable-length CAT, are conducted. For each studies, two attribute hierarchy structures with 5 and 8 attributes are assumed. Among the various item selection methods developed for CD-CAT, six algorithms are considered: posterior-weighted Kullback-Leibler index (PWKL; Cheng, 2009), the modified PWKL index (MPWKL; Kaplan, de la Torre, Barrada, 2015), Shannon entropy (SHE; Tatsuoka, 2002), mutual information (MI; Wang, 2013), posterior-weighted CDM discrimination index (PWCDI; Zheng \& Chang, 2016) and posterior-weighted attribute-level CDM discrimination index (PWACDI; Zheng \& Chang, 2016). The impact of Q-matrix structure, item quality, and test termination rules on the efficiency of item selection algorithms is also investigated. Evaluation measures include the attribute classification accuracy (fixed-length experiment) and test length of CDCAT until stopping (variable-length experiment).

The results of the study indicate that the efficiency of item selection is improved by directly modeling the attribute hierarchies using CBN. The test length until achieving diagnosis probability threshold was reduced to 50-70\% for CBN based CAT compared to the CD-CAT assuming independence of attributes. The magnitude of improvement is greater when the cognitive model of the test includes more attributes and when the test length is shorter. We conclude by discussing how Q-matrix structure, item quality, and test termination rules affect the efficiency.

References

Beerenwinkel, N., Eriksson, N., \& Sturmfels, B. (2007). Conjunctive bayesian networks. Bernoulli, 893- 909.

Chen, P., Xin, T., Wang, C., \& Chang, H. H. (2012). Online calibration methods for the DINA model with independent attributes in CD-CAT. Psychometrika, 77(2), 201-222.

Cheng, Y. (2009). When cognitive diagnosis meets computerized adaptive testing: CD-CAT. Psychometrika, 74(4), 619-632.

Culbertson, M. J. (2016). Bayesian networks in educational assessment: the state of the field. Applied Psychological Measurement, 40(1), 3-21.

Kaplan, M., de la Torre, J., \& Barrada, J. R. (2015). New item selection methods for cognitive diagnosis computerized adaptive testing. Applied Psychological Measurement, 39(3), 167-188.

Leighton, J. P., Gierl, M. J., \& Hunka, S. M. (2004). The attribute hierarchy method for cognitive assessment: a variation on Tatsuoka\&$\#$39;s rule-space approach. Journal of Educational Measurement, 41(3), 205-237.

Tatsuoka, C. (2002). Data analytic methods for latent partially ordered classification models. Journal of the Royal Statistical Society: Series C (Applied Statistics), 51(3), 337-350.

Templin, J., \& Bradshaw, L. (2014). Hierarchical diagnostic classification models: A family of models for estimating and testing attribute hierarchies. Psychometrika, 79(2), 317-339. Wang, C. (2013). Mutual information item selection method in cognitive diagnostic computerized adaptive testing with short test length. Educational and Psychological Measurement, 73(6), 1017-1035.

Wang, C., Chang, H. H., \& Huebner, A. (2011). Restrictive stochastic item selection methods in cognitive diagnostic computerized adaptive testing. Journal of Educational Measurement, 48(3), 255-273.

Xu, X., Chang, H., \& Douglas, J. (2003, April). A simulation study to compare CAT strategies for cognitive diagnosis. Paper presented at the annual meeting of National Council on Measurement in Education, Chicago.

Zheng, C., \& Chang, H. H. (2016). High-efficiency response distribution\–based item selection algorithms for short-length cognitive diagnostic computerized adaptive testing. Applied Psychological Measurement, 40(8), 608-624.

Session Video

}, keywords = {CD-CAT, Conjuctive Bayesian Network Modeling, item selection}, url = {https://drive.google.com/open?id=1RbO2gd4aULqsSgRi_VZudNN_edX82NeD}, author = {Soo-Yun Han and Yun Joo Yoo} } @conference {2652, title = {Efficiency of Targeted Multistage Calibration Designs under Practical Constraints: A Simulation Study}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

Calibration of an item bank for computer adaptive testing requires substantial resources. In this study, we focused on two related research questions. First, we investigated whether the efficiency of item calibration under the Rasch model could be enhanced by calibration designs that optimize the match between item difficulty and student ability (Berger, 1991). Therefore, we introduced targeted multistage calibration designs, a design type that refers to a combination of traditional targeted calibration designs and multistage designs. As such, targeted multistage calibration designs consider ability-related background variables (e.g., grade in school), as well as performance (i.e., outcome of a preceding test stage) for assigning students to suitable items.

Second, we explored how limited a priori knowledge about item difficulty affects the efficiency of both targeted calibration designs and targeted multistage calibration designs. When arranging items within a given calibration design, test developers need to know the item difficulties to locate items optimally within the design. However, usually, no empirical information about item difficulty is available before item calibration. Owing to missing empirical data, test developers might fail to assign all items to the most suitable location within a calibration design.

Both research questions were addressed in a simulation study in which we varied the calibration design, as well as the accuracy of item distribution across the different booklets or modules within each design (i.e., number of misplaced items). The results indicated that targeted multistage calibration designs were more efficient than ordinary targeted designs under optimal conditions. Especially, targeted multistage calibration designs provided more accurate estimates for very easy and 52 IACAT 2017 ABSTRACTS BOOKLET very difficult items. Limited knowledge about item difficulty during test construction impaired the efficiency of all designs. The loss of efficiency was considerably large for one of the two investigated targeted multistage calibration designs, whereas targeted designs were more robust.

References

Berger, M. P. F. (1991). On the efficiency of IRT models when applied to different sampling designs. Applied Psychological Measurement, 15(3), 293\–306. doi:10.1177/014662169101500310

Session Video

}, keywords = {CAT, Efficiency, Multistage Calibration}, url = {https://drive.google.com/file/d/1ko2LuiARKqsjL_6aupO4Pj9zgk6p_xhd/view?usp=sharing}, author = {Stephanie Berger and Angela J. Verschoor and Theo Eggen and Urs Moser} } @conference {2671, title = {An Empirical Simulation Study Using mstR for MST Designs}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

Unlike other systems of adaptive testing, multistage testing (MST) provides many benefits of adaptive testing and linear testing, and has become the most sought-after form for computerized testing in educational assessment recently. It is greatly fit for testing educational achievement and can be adapted to practical educational surveys testing. However, there are many practical considerations for MST design for operational implementations including costs and benefits. As a practitioner, we need to start with various simulations to evaluate the various MST designs and their performances before the implementations. A recently developed statistical tool mstR, an open source R package, was released to support the researchers and practitioners to aid their MST simulations for implementations.

Conventional MST design has three stages of module (i.e., 1-2-3 design) structure. Alternatively, the composition of modules diverges from one design to another (e.g., 1-3 design). For advance planning of equivalence studies, this paper utilizes both 1-2-3 design and 1-3 design for the MST structures. In order to study the broad structure of these values, this paper evaluates the different MST designs through simulations using the R package mstR. The empirical simulation study provides an introductory overview of mstR and describes what mstR offers using different MST structures from 2PL item bank. Further comparisons will show the advantages of the different MST designs (e.g., 1-2-3 design and 1-3 design) for different practical implementations.

As an open-source statistical environment R, mstR provides a great simulation tool and allows psychologists, social scientists, and educational measurement scientists to apply it to innovative future assessments in the operational use of MST.

}, keywords = {mstR, multistage testing}, author = {Soo Lee} } @conference {2635, title = {Evaluation of Parameter Recovery, Drift, and DIF with CAT Data}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

Parameter drift and differential item functioning (DIF) analyses are frequent components of a test maintenance plan. That is, after a test form(s) is published, organizations will often calibrate postpublishing data at a later date to evaluate whether the performance of the items or the test has changed over time. For example, if item content is leaked, the items might gradually become easier over time, and item statistics or parameters can reflect this.

When tests are published under a computerized adaptive testing (CAT) paradigm, they are nearly always calibrated with item response theory (IRT). IRT calibrations assume that range restriction is not an issue \– that is, each item is administered to a range of examinee ability. CAT data violates this assumption. However, some organizations still wish to evaluate continuing performance of the items from a DIF or drift paradigm.

This presentation will evaluate just how inaccurate DIF and drift analyses might be on CAT data, using a Monte Carlo parameter recovery methodology. Known item parameters will be used to generate both linear and CAT data sets, which are then calibrated for DIF and drift. In addition, we will implement Randomesque item exposure constraints in some CAT conditions, as this randomization directly alleviates the range restriction problem somewhat, but it is an empirical question as to whether this improves the parameter recovery calibrations.

Session Video

}, keywords = {CAT, DIF, Parameter Drift, Parameter Recovery}, url = {https://drive.google.com/open?id=1F7HCZWD28Q97sCKFIJB0Yps0H66NPeKq}, author = {Nathan Thompson and Jordan Stoeger} } @conference {2669, title = {FastCAT {\textendash} Customizing CAT Administration Rules to Increase Response Efficiency}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

A typical pre-requisite for CAT administration is the existence of an underlying item bank completely covering the range of the trait being measured. When a bank fails to cover the full range of the trait, examinees who are close to the floor or ceiling will often never achieve a standard error cut-off and examinees will be forced to answer items increasingly less relevant to their trait level. This scenario is fairly typical for many patients responding to patient reported outcome measures (PROMS). For IACAT 2017 ABSTRACTS BOOKLET 65 example, in the assessment of physical functioning, many item banks ceiling at about the 50\%ile. For most healthy patients, after a few items the only items remaining in the bank will represent decreasing ability (even though the patient has already indicated that they are at or above the mean for the population). Another example would be for a patient with no pain taking a Pain CAT. They will probably answer \“Never\” pain for every succeeding item out to the maximum test length. For this project we sought to reduce patient burden, while maintaining test accuracy, through the reduction of CAT length using novel stopping rules.

We studied CAT administration assessment histories for patients who were administered Patient Reported Outcomes Measurement Information System (PROMIS) CATs. In the PROMIS 1 Wave 2 Back Pain/Depression Study, CATs were administered to N=417 cases assessed across 11 PROMIS domains. Original CAT administration rules were: start with a pre-identified item of moderate difficulty; administer a minimum four items per case; stop when an estimated theta\’s SE declines to \< 0.3 OR a maximum 12 items are administered.

Original CAT. 12,622 CAT administrations were analyzed. CATs ranged in number of items administered from 4 to 12 items; 72.5\% were 4-item CATs. The second and third most frequently occurring CATs were 5-item (n=1102; 8.7\%) and 12-item CATs (n=964; 7.6\%). 64,062 items total were administered, averaging 5.1 items per CAT. Customized CAT. Three new CAT stopping rules were introduced, each with potential to increase item-presentation efficiency and maintain required score precision: Stop if a case responds to the first two items administered using an \“extreme\” response category (towards the ceiling or floor for the in item bank, or at ); administer a minimum two items per case; stop if the change in SE estimate (previous to current item administration) is positive but \< 0.01.

The three new stopping rules reduced the total number of items administered by 25,643 to 38,419 items (40.0\% reduction). After four items were administered, only n=1,824 CATs (14.5\%) were still in assessment mode (vs. n=3,477 (27.5\%) in the original CATs). On average, cases completed 3.0 items per CAT (vs. 5.1).

Each new rule addressed specific inefficiencies in the original CAT administration process: Cases not having or possessing a low/clinically unimportant level of the assessed domain; allow the SE \<0.3 stopping criterion to come into effect earlier in the CAT administration process; cases experiencing poor domain item bank measurement, (e.g., \“floor,\” \“ceiling\” cases).

}, keywords = {Administration Rules, Efficiency, FastCAT}, url = {https://drive.google.com/open?id=1oPJV-x0p9hRmgJ7t6k-MCC1nAoBSFM1w}, author = {Richard C. Gershon} } @conference {2663, title = {From Blueprints to Systems: An Integrated Approach to Adaptive Testing}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

For years, test blueprints have told test developers how many items and what types of items will be included in a test. Adaptive testing adopted this approach from paper testing, and it is reasonably useful. Unfortunately, \&$\#$39;how many items and what types of items\&$\#$39; are not all the elements one should consider when choosing items for an adaptive test. To fill in gaps, practitioners have developed tools to allow an adaptive test to behave appropriately (i.e. examining exposure control, content balancing, item drift procedures, etc.). Each of these tools involves the use of a separate process external to the primary item selection process.

The use of these subsidiary processes makes item selection less optimal and makes it difficult to prioritize aspects of selection. This discussion describes systems-based adaptive testing. This approach uses metadata concerning items, test takers and test elements to select items. These elements are weighted by the stakeholders to shape an expanded blueprint designed for adaptive testing.\ 

Session Video

}, keywords = {CAT, integrated approach, Keynote}, url = {https://drive.google.com/open?id=1CBaAfH4ES7XivmvrMjPeKyFCsFZOpQMJ}, author = {Gage Kingsbury and Tony Zara} } @conference {2631, title = {Generating Rationales to Support Formative Feedback in Adaptive Testing}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

Computer adaptive testing offers many important benefits to support and promote life-long learning. Computers permit testing on-demand thereby allowing students to take the test at any time during instruction; items on computerized tests are scored immediately thereby providing students with instant feedback; computerized tests permit continuous administration thereby allowing students to have more choice about when they write their exams. But despite these important benefits, the advent of computer adaptive testing has also raised formidable challenges, particularly in the area of item development. Educators must have access to large numbers of diverse, high-quality test items to implement computerize adaptive testing because items are continuously administered to students. Hence, hundreds or even thousands of items are needed to develop the test item banks necessary for computer adaptive testing. Unfortunately, educational test items, as they are currently created, are time consuming and expensive to develop because each individual item is written, initially, by a content specialist and, then, reviewed, edited, and revised by groups of content specialists to ensure the items yield reliable and valid information. Hence, item development is one of the most important problems that must be solved before we can migrate to computer adaptive testing to support life-long learning because large numbers of high-quality, content-specific, test items are required.

One promising item development method that may be used to address this challenge is with automatic item generation. Automatic item generation is a relatively new but rapidly evolving research area where cognitive and psychometric modelling practices are used produce hundreds of new test items with the aid of computer technology. The purpose of our presentation is to describe a new methodology for generating both the items and the rationales required to solve each generated item in order to produce the feedback needed to support life-long learning. Our item generation methodology will first be described. To ensure our description is practical, the method will also be demonstrated using generated items from the health sciences to demonstrate how item generation can promote life-long learning for medical educators and practitioners.

Session Video

}, keywords = {Adaptive Testing, formative feedback, Item generation}, url = {https://drive.google.com/open?id=1O5KDFtQlDLvhNoDr7X4JO4arpJkIHKUP}, author = {Mark Gierl and Okan Bulut} } @conference {2647, title = {Grow a Tiger out of Your CAT }, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

The main focus in the community of test developers and researchers is on improving adaptive test procedures and methodologies. Yet, the transition from research projects to larger-scale operational CATs is facing its own challenges. Usually, these operational CATs find their origin in government tenders. \“Scalability\”, \“Interoperability\” and \“Transparency\” are three keywords often found in these documents. Scalability is concerned with parallel system architectures which are based upon stateless selection algorithms. Design capacities often range from 10,000 to well over 100,000 concurrent students. Interoperability is implemented in standards like QTI, standards that were not designed with adaptive testing in mind. Transparency is being realized by open source software: the adaptive test should not be a black box. These three requirements often complicate the development of an adaptive test, or sometimes even conflict.

Session Video

}, keywords = {interoparability, Scalability, transparency}, author = {Angela Verschoor} } @article {2519, title = {Heuristic Constraint Management Methods in Multidimensional Adaptive Testing}, journal = {Educational and Psychological Measurement}, volume = {77}, number = {2}, year = {2017}, pages = {241-262}, abstract = {Although multidimensional adaptive testing (MAT) has been proven to be highly advantageous with regard to measurement efficiency when several highly correlated dimensions are measured, there are few operational assessments that use MAT. This may be due to issues of constraint management, which is more complex in MAT than it is in unidimensional adaptive testing. Very few studies have examined the performance of existing constraint management methods (CMMs) in MAT. The present article focuses on the effectiveness of two promising heuristic CMMs in MAT for varying levels of imposed constraints and for various correlations between the measured dimensions. Through a simulation study, the multidimensional maximum priority index (MMPI) and multidimensional weighted penalty model (MWPM), as an extension of the weighted penalty model, are examined with regard to measurement precision and constraint violations. The results show that both CMMs are capable of addressing complex constraints in MAT. However, measurement precision losses were found to differ between the MMPI and MWPM. While the MMPI appears to be more suitable for use in assessment situations involving few to a moderate number of constraints, the MWPM should be used when numerous constraints are involved.}, doi = {10.1177/0013164416643744}, url = {http://dx.doi.org/10.1177/0013164416643744}, author = {Sebastian Born and Andreas Frey} } @conference {2627, title = {How Adaptive is an Adaptive Test: Are all Adaptive Tests Adaptive?}, booktitle = {2017 IACAT Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

There are many different kinds of adaptive tests but they all have the characteristic that some feature of the test is customized to the purpose of the test. In the time allotted, it is impossible to consider the adaptation of all of this types so this address will focus on the \“classic\” adaptive test that matches the difficulty of the test to the capabilities of the person being tested. This address will first present information on the maximum level of adaptation that can occur and then compare the amount of adaptation that typically occurs on an operational adaptive test to the maximum level of adaptation. An index is proposed to summarize the amount of adaptation and it is argued that this type of index should be reported for operational adaptive tests to show the amount of adaptation that typically occurs.

Click for Presentation Video\ 

}, keywords = {Adaptive Testing, CAT}, url = {https://drive.google.com/open?id=1Nj-zDCKk3DvHA4Jlp1qkb2XovmHeQfxu}, author = {Mark D Reckase} } @conference {2642, title = {The Implementation of Nationwide High Stakes Computerized (adaptive) Testing in the Netherlands}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

In this presentation the challenges of implementation of (adaptive) digital testing in the Facet system in the Netherlands is discussed. In the Netherlands there is a long tradition of implementing adaptive testing in educational settings. Already since the late nineties of the last century adaptive testing was used mostly in low stakes testing. Several CATs were implemented in student monitoring systems for primary education and in the general subjects language and arithmetic in vocational education. The only nationwide implemented high stakes CAT is the WISCAT-pabo: an arithmetic test for students in the first year of primary school teacher colleges. The psychometric advantages of item based adaptive testing are obvious. For example efficiency and high measurement precision. But there are also some disadvantages such as the impossibility of reviewing items during and after the test. During the test the student is not in control of his own test; e.q . he can only navigate forward to the next item. This is one of the reasons other methods of testing, such as multistage-testing, with adaptivity not on the item level but on subtest level, has become more popular to use in high stakes testing.

A main challenge of computerized (adaptive) testing is the implementation of the item bank and the test workflow in a digital system. Since 2014 a nationwide new digital system (Facet) was introduced in the Netherlands, with connections to the digital systems of different parties based on international standards (LTI and QTI). The first nationwide tests in the Facet-system were flexible exams Dutch and arithmetic for vocational (and secondary) education, taken as item response theory-based equated linear multiple forms tests, which are administered during 5 periods in a year. Nowadays there are some implementations of different methods of (multistage) adaptive testing in the same Facet system (DTT en Acet).

In this conference, other presenters of Cito will elaborate on the psychometric characteristics of this other adaptive testing methods. In this contribution, the system architecture and interoperability of the Facet system will be explained. The emphasis is on the implementation and the problems to be solved by using this digital system in all phases of the (adaptive) testing process: item banking, test construction, designing, publication, test taking, analyzing and reporting to the student. An evaluation of the use of the system will be presented.

Session Video

}, keywords = {High stakes CAT, Netherlands, WISCAT}, url = {https://drive.google.com/open?id=1Kn1PvgioUYaOJ5pykq-_XWnwDU15rRsf}, author = {Mia van Boxel and Theo Eggen} } @conference {2672, title = {An Imputation Approach to Handling Incomplete Computerized Tests}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

As technology advances, computerized adaptive testing (CAT) is becoming increasingly popular as it allows tests to be tailored to an examinee\’s ability.\  Nevertheless, examinees might devise testing strategies to use CAT to their advantage.\  For instance, if only the items that examinees answer count towards their score, then a higher theta score might be obtained by spending more time on items at the beginning of the test and skipping items at the end if time runs out. This type of gaming can be discouraged if examinees\’ scores are lowered or \“penalized\” based on the amount of non-response.

The goal of this study was to devise a penalty function that would meet two criteria: 1) the greater the omit rate, the greater the penalty, and 2) examinees with the same ability and the same omit rate should receive the same penalty. To create the penalty, theta was calculated based on only the items the examinee responded to ( ).\  Next, the expected number correct score (EXR) was obtained using \ and the test characteristic curve. A penalized expected number correct score (E ) was obtained by multiplying EXR by the proportion of items the examinee responded to. Finally, the penalized theta ( ) was identified using the test characteristic curve. Based on\  \ and the item parameters ( ) of an unanswered item, the likelihood of a correct response, \ , is computed and employed to estimate the imputed score ( ) for the unanswered item.

Two datasets were used to generate tests with completion rates of 50\%, 80\%, and 90\%.\  The first dataset included real data where approximately 4,500 examinees responded to a 21 -item test which provided a baseline/truth. Sampling was done to achieve the three completion rate conditions. The second dataset consisted of simulated item scores for 50,000 simulees under a 1-2-4 multi-stage CAT design where each stage contained seven items. Imputed item scores for unanswered items were computed using a variety of values for G (and therefore T).\  Three other approaches to handling unanswered items were also considered: all correct (i.e., T = 0), all incorrect (i.e., T = 1), and random scoring (i.e., T = 0.5).

The current study investigated the impact on theta estimates resulting from the proposed approach to handling unanswered items in a fixed-length CAT. In real testing situations, when examinees do not finish a test, it is hard to tell whether they tried diligently but ran out of time or whether they attempted to manipulate the scoring engine.\  To handle unfinished tests with penalties, the proposed approach considers examinees\’ abilities and incompletion rates. The results of this study provide direction for psychometric practitioners when considering penalties for omitted responses.

Session Video

}, keywords = {CAT, imputation approach, incomplete computerized test}, url = {https://drive.google.com/open?id=1vznZeO3nsZZK0k6_oyw5c9ZTP8uyGnXh}, author = {Troy Chen and Chi-Yu Huang and Chunyan Liu} } @article {2608, title = {The Information Product Methods: A Unified Approach to Dual-Purpose Computerized Adaptive Testing}, journal = {Applied Psychological MeasurementApplied Psychological Measurement}, volume = {42}, year = {2017}, month = {2018/06/01}, pages = {321 - 324}, abstract = {This article gives a brief summary of major approaches in dual-purpose computerized adaptive testing (CAT) in which the test is tailored interactively to both an examinee?s overall ability level, ?, and attribute mastery level, α. It also proposes an information product approach whose connections to the current methods are revealed. An updated comprehensive empirical study demonstrated that the information product approach not only can offer a unified framework to connect all other approaches but also can mitigate the weighting issue in the dual-information approach.}, isbn = {0146-6216}, url = {https://doi.org/10.1177/0146621617730392}, author = {Zheng, Chanjin and He, Guanrui and Gao, Chunlei} } @conference {2634, title = {Issues in Trait Range Coverage for Patient Reported Outcome Measure CATs - Extending the Ceiling for Above-average Physical Functioning}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

The use of a measure which fails to cover the upper range of functioning may produce results which can lead to serious misinterpretation. Scores produced by such a measure may fail to recognize significant improvement, or may not be able to demonstrate functioning commensurate with an important milestone. Accurate measurement of this range is critical for the assessment of physically active adults, e.g., athletes recovering from injury and active military personnel who wish to return to active service. Alternatively, a PF measure with a low ceiling might fail to differentiate patients in rehabilitation who continue to improve, but for whom their score ceilings due to the measurement used.

The assessment of physical function (PF) has greatly benefited from modern psychometric theory and resulting scales, such as the Patient-Reported Outcomes Measurement Information System (PROMIS\®) PF instruments. While PROMIS PF has extended the range of function upwards relative to older \“legacy\” instruments, few PROMIS PF items asses high levels of function. We report here on the development of higher functioning items for the PROMIS PF bank.

An expert panel representing orthopedics, sports/military medicine, and rehabilitation reviewed existing instruments and wrote new items. After internal review, cognitive interviews were conducted with 24 individuals of average and high levels of physical function. The remaining candidate items were administered along with 50 existing PROMIS anchor items to an internet panel screened for low, average, and high levels of physical function (N = 1,600), as well as members of Boston-area gyms (N= 344). The resulting data was subjected to standard psychometric analysis, along with multiple linking methods to place the new items on the existing PF metric. The new items were added to the full PF bank for simulated computerized adaptive testing (CAT).

Item response data was collected on 54 candidate items. Items that exhibited local dependence (LD) or differential item functioning (DIF) related to gender, age, race, education, or PF status. These items were removed from consideration. Of the 50 existing PROMIS PF items, 31 were free of DIF and LD and used as anchors. The parameters for the remaining new candidate items were estimated twice: freelyestimated and linked with coefficients and fixed-anchor calibration. Both methods were comparable and had appropriate fit. The new items were added to the full PF bank for simulated CATs. The resulting CAT was able to extend the ceiling with high precision to a T-score of 68, suggesting accurate measurement for 97\% of the general population.

Extending the range of items by which PF is measured will substantially improve measurement quality, applicability, and efficiency. The bank has incorporated these extension items and is available for use in research and clinics for brief CAT administration (see www.healthmeasures.net). Future research projects should focus on recovery trajectories of the measure for individuals with above average function who are recovering from injury.

Session Video

}, keywords = {CAT, Issues, Patient Reported Outcome}, url = {https://drive.google.com/open?id=1ZC02F-dIyYovEjzpeuRdoXDiXMLFRuKb}, author = {Richard C. Gershon} } @conference {2650, title = {Item Parameter Drifting and Online Calibration}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

Item calibration is a part of the most important topics in item response theory (IRT). Since many largescale testing programs have switched from paper and pencil (P\&P) testing mode to computerized adaptive testing (CAT) mode, developing methods for efficiently calibrating new items have become vital. Among many proposed item calibration processes in CAT, online calibration is the most cost-effective. This presentation introduces an online (re)calibration design to detect item parameter drift for computerized adaptive testing (CAT) in both unidimensional and multidimensional environments. Specifically, for online calibration optimal design in unidimensional computerized adaptive testing model, a two-stage design is proposed by implementing a proportional density index algorithm. For a multidimensional computerized adaptive testing model, a four-quadrant online calibration pretest item selection design with proportional density index algorithm is proposed. Comparisons were made between different online calibration item selection strategies. Results showed that under unidimensional computerized adaptive testing, the proposed modified two-stage item selection criterion with the proportional density algorithm outperformed the other existing methods in terms of item parameter calibration and item parameter drift detection, and under multidimensional computerized adaptive testing, the online (re)calibration technique with the proposed four-quadrant item selection design with proportional density index outperformed other methods.

Session Video

}, keywords = {online calibration, Parameter Drift}, author = {Hua-Hua Chang and Rui Guo} } @conference {2646, title = {Item Pool Design and Evaluation}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

Early work on CAT tended to use existing sets of items which came from fixed length test forms. These sets of items were selected to meet much different requirements than are needed for a CAT; decision making or covering a content domain. However, there was also some early work that suggested having items equally distributed over the range of proficiency that was of interest or concentrated at a decision point. There was also some work that showed that there was bias in proficiency estimates when an item pool was too easy or too hard. These early findings eventually led to work on item pool design and, more recently, on item pool evaluation. This presentation gives a brief overview of these topics to give some context for the following presentations in this symposium.

Session Video

}, keywords = {CAT, Item Pool Design}, url = {https://drive.google.com/open?id=1ZAsqm1yNZlliqxEHcyyqQ_vOSu20xxZs}, author = {Mark D Reckase and Wei He and Jing-Ru Xu and Xuechun Zhou} } @conference {2662, title = {Item Response Time on Task Effect in CAT}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

Introduction. In addition to reduced test length and increased measurement efficiency, computerized adaptive testing (CAT) can provide new insights into the cognitive process of task completion that cannot be mined via conventional tests. Response time is a primary characteristic of the task completion procedure. It has the potential to inform us about underlying processes. In this study, the relationship between response time and response accuracy will be investigated.

Hypothesis. The present study argues that the relationship between response time on task and response accuracy, which may be positive, negative, or curvilinear, will depend on cognitive nature of task items, holding ability of the subjects and difficulty of the items constant. The interpretations regarding the associations are not uniform either.

Research question. Is there a homogeneous effect of response time on test outcome across Graduate

Proposed explanations. If the accuracy of cognitive test responses decreases with response time, then it is an indication that the underlying cognitive process is a degrading process such as knowledge retrieval. More accessible knowledge can be retrieved faster than less accessible knowledge. It is inherent to knowledge retrieval that the success rate declines with elapsing response time. For instance, in reading tasks, the time on task effect is negative and the more negative, the easier a task is. However, if the accuracy of cognitive test responses increases with response time, then the process is of an upgrading nature, with an increasing success rate as a function of response time. For example, problem-solving takes time, and fast responses are less likely to be well-founded responses. It is of course also possible that the relationship is curvilinear, as when an increasing success rate is followed by a decreasing success rate or vice versa.

Methodology. The data are from computer-based GRE quantitative and verbal tests and will be analyzed with generalized linear mixed models (GLMM) framework after controlling the effect of ability and item difficulty as possible confounding factors. A linear model means a linear combination of predictors determining the probability of person p for answering item i correctly. The models are equivalent with advanced IRT models that go beyond the regular modeling of test responses in terms of one or more latent variables and item parameters. The lme4 package for R will be utilized to conduct the statistical calculation.

Implications. The right amount of testing time in CAT is important\—too much is wasteful and costly, too little impacts score validity. The study is expected to provide new perception on the relationship between response time and response accuracy, which in turn, contribute to a better understanding of time effects and relevant cognitive process in CA.

Session Video

}, keywords = {CAT, Response time, Task effect}, author = {Yang Shi} } @conference {2629, title = {Item Selection Strategies for Developing CAT in Indonesia}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niiagata Seiryo University}, organization = {Niiagata Seiryo University}, address = {Niigata Japan}, abstract = {

Recently, development of computerized testing in Indonesia is quiet promising for the future. Many government institutions used the technology for recruitment. Starting from Indonesian Army acknowledged the benefits of computerized adaptive testing (CAT) over conventional test administration, ones of the issues of selection the first item have taken place of attention. Due to CAT\’s basic philosophy, several methods can be used to select the first item such as educational level, ability estimation from item simulation, or other methods. In this case, the question is remains how apply the methods most effective in the context of constrained adaptive testing. This paper reviews such strategies that appeared in the relevant literature. The focus of this paper is on studies that have been conducted in order to evaluate the effectiveness of item selection strategies for dichotomous scoring. In this paper, also discusses the strength and weaknesses of each strategy group using examples from simulation studies. No new research is presented but rather a compendium of models is reviewed in term of learning in the newcomer context, a wide view of first item selection strategies.

}, keywords = {CAT, Indonesia, item selection strategies}, url = {https://www.youtube.com/watch?v=2KuFrRATq9Q}, author = {Istiani Chandra} } @article {2614, title = {Item usage in a multidimensional computerized adaptive test (MCAT) measuring health-related quality of life}, journal = {Quality of Life Research}, volume = {26}, number = {11}, year = {2017}, pages = {2909{\textendash}2918}, issn = {1573-2649}, doi = {10.1007/s11136-017-1624-3}, url = {https://doi.org/10.1007/s11136-017-1624-3}, author = {Paap, Muirne C. S. and Kroeze, Karel A. and Terwee, Caroline B. and van der Palen, Job and Veldkamp, Bernard P.} } @conference {2643, title = {A Large-Scale Progress Monitoring Application with Computerized Adaptive Testing}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

Many conventional assessment tools are available to teachers in schools for monitoring student progress in a formative manner. The outcomes of these assessment tools are essential to teachers\’ instructional modifications and schools\’ data-driven educational strategies, such as using remedial activities and planning instructional interventions for students with learning difficulties. When measuring student progress toward instructional goals or outcomes, assessments should be not only considerably precise but also sensitive to individual change in learning. Unlike conventional paper-pencil assessments that are usually not appropriate for every student, computerized adaptive tests (CATs) are highly capable of estimating growth consistently with minimum and consistent error. Therefore, CATs can be used as a progress monitoring tool in measuring student growth.

This study focuses on an operational CAT assessment that has been used for measuring student growth in reading during the academic school year. The sample of this study consists of nearly 7 million students from the 1st grade to the 12th grade in the US. The students received a CAT-based reading assessment periodically during the school year. The purpose of these periodical assessments is to measure the growth in students\’ reading achievement and identify the students who may need additional instructional support (e.g., academic interventions). Using real data, this study aims to address the following research questions: (1) How many CAT administrations are necessary to make psychometrically sound decisions about the need for instructional changes in the classroom or when to provide academic interventions?; (2) What is the ideal amount of time between CAT administrations to capture student growth for the purpose of producing meaningful decisions from assessment results?

To address these research questions, we first used the Theil-Sen estimator for robustly fitting a regression line to each student\’s test scores obtained from a series of CAT administrations. Next, we\ used the conditional standard error of measurement (cSEM) from the CAT administrations to create an error band around the Theil-Sen slope (i.e., student growth rate). This process resulted in the normative slope values across all the grade levels. The optimal number of CAT administrations was established from grade-level regression results. The amount of time needed for progress monitoring was determined by calculating the amount of time required for a student to show growth beyond the median cSEM value for each grade level. The results showed that the normative slope values were the highest for lower grades and declined steadily as grade level increased. The results also suggested that the CAT-based reading assessment is most useful for grades 1 through 4, since most struggling readers requiring an intervention appear to be within this grade range. Because CAT yielded very similar cSEM values across administrations, the amount of error in the progress monitoring decisions did not seem to depend on the number of CAT administrations.

Session Video

}, keywords = {CAT, Large-Scale tests, Process monitoring}, url = {https://drive.google.com/open?id=1uGbCKenRLnqTxImX1fZicR2c7GRV6Udc}, author = {Okan Bulut and Damien Cormier} } @article {2529, title = {Latent-Class-Based Item Selection for Computerized Adaptive Progress Tests}, journal = {Journal of Computerized Adaptive Testing}, volume = {5}, year = {2017}, pages = {22-43}, keywords = {computerized adaptive progress test, item selection method, Kullback-Leibler information, Latent class analysis, log-odds scoring}, issn = {2165-6592}, doi = {10.7333/1704-0502022}, url = {http://iacat.org/jcat/index.php/jcat/article/view/62/29}, author = {van Buuren, Nikky and Eggen, Theo J. H. M.} } @conference {2660, title = {MHK-MST Design and the Related Simulation Study}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

The MHK is a national standardized exam that tests and rates Chinese language proficiency. It assesses non-native Chinese minorities\’ abilities in using the Chinese language in their daily, academic and professional lives; Computerized multistage adaptive testing (MST) is a combination of conventional paper-and-pencil (P\&P) and item level computerized adaptive test (CAT), it is a kind of test forms based on computerized technology, take the item set as the scoring unit. It can be said that, MST estimate the Ability extreme value more accurate than conventional paper-and-pencil (P\&P), also used the CAT auto-adapted characteristic to reduce the examination length and the score time of report. At present, MST has used in some large test, like Uniform CPA Examination and Graduate Record Examination(GRE). Therefore, it is necessary to develop the MST of application in China.

Based on consideration of the MHK characteristics and its future development, the researchers start with design of MHK-MST. This simulation study is conducted to validate the performance of the MHK -MST system. Real difficulty parameters of MHK items and the simulated ability parameters of the candidates are used to generate the original score matrix and the item modules are delivered to the candidates following the adaptive procedures set according to the path rules. This simulation study provides a sound basis for the implementation of MHK-MST.

Session Video

}, keywords = {language testing, MHK, multistage testing}, author = {Ling Yuyu and Zhou Chenglin and Ren Jie} } @conference {2665, title = {Multi-stage Testing for a Multi-disciplined End-of primary-school Test }, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

The Dutch secondary education system consists of five levels: basic, lower, and middle vocational education, general secondary education, and pre-academic education. The individual decision for level of secondary education is based on a combination of the teacher\’s judgment and an end-of-primaryschool placement test.

This placement test encompasses the measurement of reading, language, mathematics and writing; each skill consisting of one to four subdomains. The Dutch end-of-primaryschool test is currently administered in two linear 200-item paper-based versions. The two versions differ in difficulty so as to motivate both less able and more able students, and measure both groups of students precisely. The primary goal of the test is providing a placement advice for five levels of secondary education. The secondary goal is the assessment of six different fundamental reference levels defined on reading, language, and mathematics. Because of the high stakes advice of the test, the Dutch parliament has instructed to change the format to a multistage test. A major advantage of multistage testing is that the tailoring of the tests is more strongly related to the ability of the students than to the teacher\’s judgment. A separate multistage test is under development for each of the three skills measured by the reference levels to increase the classification accuracy for secondary education placement and to optimally measure the performance on the reference-level-related skills.

This symposium consists of three presentations discussing the challenges in transitioning from a linear paper-based test to a computer-based multistage test within an existing curriculum and the specification of the multistage test to meet the measurement purposes. The transitioning to a multistage test has to improve both classification accuracy and measurement precision.

First, we describe the Dutch educational system and the role of the end-of-primary-school placement test within this system. Special attention will be paid to the advantages of multistage testing over both linear testing and computerized adaptive testing, and on practical implications related to the transitioning from a linear to a multistage test.

Second, we discuss routing and reporting on the new multi-stage test. Both topics have a major impact on the quality of the placement advice and the reference mastery decisions. Several methods for routing and reporting are compared.

Third, the linear test contains 200 items to cover a broad range of different skills and to obtain a precise measurement of those skills separately. Multistage testing creates opportunities to reduce the cognitive burden for the students while maintaining the same quality of placement advice and assessment of mastering of reference levels. This presentation focuses on optimal allocation of items to test modules, optimal number of stages and modules per stage and test length reduction.

Session Video 1

Session Video 2

}, keywords = {mst, Multidisciplined, proficiency}, url = {https://drive.google.com/open?id=1C5ys178p_Wl9eemQuIsI56IxDTck2z8P}, author = {Hendrik Straat and Maaike van Groen and Wobbe Zijlstra and Marie-Anne Keizer-Mittelha{\"e}user and Michel Lamor{\'e}} } @conference {2648, title = {New Challenges (With Solutions) and Innovative Applications of CAT}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

Over the past several decades, computerized adaptive testing (CAT) has profoundly changed the administration of large-scale aptitude tests, state-wide achievement tests, professional licensure exams, and health outcome measures. While many challenges of CAT have been successfully addressed due to the continual efforts of researchers in the field, there are still many remaining, longstanding challenges that have yet to be resolved. This symposium will begin with three presentations, each of which provides a sound solution to one of the unresolved challenges. They are (1) item calibration when responses are \“missing not at random\” from CAT administration; (2) online calibration of new items when person traits have non-ignorable measurement error; (3) establishing consistency and asymptotic normality of latent trait estimation when allowing item response revision in CAT. In addition, this symposium also features innovative applications of CAT. In particular, there is emerging interest in using cognitive diagnostic CAT to monitor and detect learning progress (4th presentation). Last but not least, the 5th presentation illustrates the power of multidimensional polytomous CAT that permits rapid identification of hospitalized patients\’ rehabilitative care needs in\ health outcomes measurement. We believe this symposium covers a wide range of interesting and important topics in CAT.

Session Video

}, keywords = {CAT, challenges, innovative applications}, url = {https://drive.google.com/open?id=1Wvgxw7in_QCq_F7kzID6zCZuVXWcFDPa}, author = {Chun Wang and David J. Weiss and Xue Zhang and Jian Tao and Yinhong He and Ping Chen and Shiyu Wang and Susu Zhang and Haiyan Lin and Xiaohong Gao and Hua-Hua Chang and Zhuoran Shang} } @conference {2638, title = {A New Cognitive Diagnostic Computerized Adaptive Testing for Simultaneously Diagnosing Skills and Misconceptions}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

In education diagnoses, diagnosing misconceptions is important as well as diagnosing skills. However, traditional cognitive diagnostic computerized adaptive testing (CD-CAT) is usually developed to diagnose skills. This study aims to propose a new CD-CAT that can simultaneously diagnose skills and misconceptions. The proposed CD-CAT is based on a recently published new CDM, called the simultaneously identifying skills and misconceptions (SISM) model (Kuo, Chen, \& de la Torre, in press). A new item selection algorithm is also proposed in the proposed CD-CAT for achieving high adaptive testing performance. In simulation studies, we compare our new item selection algorithm with three existing item selection methods, including the Kullback\–Leibler (KL) and posterior-weighted KL (PWKL) proposed by Cheng (2009) and the modified PWKL (MPWKL) proposed by Kaplan, de la Torre, and Barrada (2015). The results show that our proposed CD-CAT can efficiently diagnose skills and misconceptions; the accuracy of our new item selection algorithm is close to the MPWKL but less computational burden; and our new item selection algorithm outperforms the KL and PWKL methods on diagnosing skills and misconceptions.

References

Cheng, Y. (2009). When cognitive diagnosis meets computerized adaptive testing: CD-CAT. Psychometrika, 74(4), 619\–632. doi: 10.1007/s11336-009-9123-2

Kaplan, M., de la Torre, J., \& Barrada, J. R. (2015). New item selection methods for cognitive diagnosis computerized adaptive testing. Applied Psychological Measurement, 39(3), 167\–188. doi:10.1177/0146621614554650

Kuo, B.-C., Chen, C.-H., \& de la Torre, J. (in press). A cognitive diagnosis model for identifying coexisting skills and misconceptions. Applied Psychological Measurement.

Session Video

}, keywords = {CD-CAT, Misconceptions, Simultaneous diagnosis}, author = {Bor-Chen Kuo and Chun-Hua Chen} } @conference {2636, title = {New Results on Bias in Estimates due to Discontinue Rules in Intelligence Testing}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

The presentation provides new results on a form of adaptive testing that is used frequently in intelligence testing. In these tests, items are presented in order of increasing difficulty, and the presentation of items is adaptive in the sense that each subtest session is discontinued once a test taker produces a certain number of incorrect responses in sequence. The subsequent (not observed) responses are commonly scored as wrong for that subtest, even though the test taker has not seen these. Discontinuation rules allow a certain form of adaptiveness both in paper-based and computerbased testing, and help reducing testing time.

Two lines of research that are relevant are studies that directly assess the impact of discontinuation rules, and studies that more broadly look at the impact of scoring rules on test results with a large number of not administered or not reached items. He \& Wolf (2012) compared different ability estimation methods for this type of discontinuation rule adaptation of test length in a simulation study. However, to our knowledge there has been no rigorous analytical study of the underlying distributional changes of the response variables under discontinuation rules. It is important to point out that the results obtained by He \& Wolf (2012) agree with results presented by, for example, DeAyala, Plake \& Impara (2001) as well as Rose, von Davier \& Xu (2010) and Rose, von Davier \& Nagengast (2016) in that ability estimates are biased most when scoring the not observed responses as wrong. Discontinuation rules combined with scoring the non-administered items as wrong is used operationally in several major intelligence tests, so more research is needed in order to improve this particular type of adaptiveness in the testing practice.

The presentation extends existing research on adaptiveness by discontinue-rules in intelligence tests in multiple ways: First, a rigorous analytical study of the distributional properties of discontinue-rule scored items is presented. Second, an extended simulation is presented that includes additional alternative scoring rules as well as bias-corrected ability estimators that may be suitable to improve results for discontinue-rule scored intelligence tests.

References: DeAyala, R. J., Plake, B. S., \& Impara, J. C. (2001). The impact of omitted responses on the accuracy of ability estimation in item response theory. Journal of Educational Measurement, 38, 213-234.

He, W. \& Wolfe, E. W. (2012). Treatment of Not-Administered Items on Individually Administered Intelligence Tests. Educational and Psychological Measurement, Vol 72, Issue 5, pp. 808 \– 826. DOI: 10.1177/0013164412441937

Rose, N., von Davier, M., \& Xu, X. (2010). Modeling non-ignorable missing data with item response theory (IRT; ETS RR-10-11). Princeton, NJ: Educational Testing Service.

Rose, N., von Davier, M., \& Nagengast, B. (2016) Modeling omitted and not-reached items in irt models. Psychometrika. doi:10.1007/s11336-016-9544-7

Session Video

}, keywords = {Bias, CAT, Intelligence Testing}, author = {Matthias von Davier and Youngmi Cho and Tianshu Pan} } @article {2607, title = {Projection-Based Stopping Rules for Computerized Adaptive Testing in Licensure Testing}, journal = {Applied Psychological MeasurementApplied Psychological Measurement}, volume = {42}, year = {2017}, month = {2018/06/01}, pages = {275 - 290}, abstract = {The confidence interval (CI) stopping rule is commonly used in licensure settings to make classification decisions with fewer items in computerized adaptive testing (CAT). However, it tends to be less efficient in the near-cut regions of the ? scale, as the CI often fails to be narrow enough for an early termination decision prior to reaching the maximum test length. To solve this problem, this study proposed the projection-based stopping rules that base the termination decisions on the algorithmically projected range of the final ? estimate at the hypothetical completion of the CAT. A simulation study and an empirical study were conducted to show the advantages of the projection-based rules over the CI rule, in which the projection-based rules reduced the test length without jeopardizing critical psychometric qualities of the test, such as the ? and classification precision. Operationally, these rules do not require additional regularization parameters, because the projection is simply a hypothetical extension of the current test within the existing CAT environment. Because these new rules are specifically designed to address the decreased efficiency in the near-cut regions as opposed to for the entire scale, the authors recommend using them in conjunction with the CI rule in practice.}, isbn = {0146-6216}, url = {https://doi.org/10.1177/0146621617726790}, author = {Luo, Xiao and Kim, Doyoung and Dickison, Philip} } @conference {2670, title = {Response Time and Response Accuracy in Computerized Adaptive Testing}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

Introduction. This study explores the relationship between response speed and response accuracy in Computerized Adaptive Testing (CAT). CAT provides a score as well as item response times, which can offer additional diagnostic information regarding behavioral processes of task completion that cannot be uncovered by paper-based instruments. The goal of this study is to investigate how the accuracy rate evolves as a function of response time. If the accuracy of cognitive test responses decreases with response time, then it is an indication that the underlying cognitive process is a degrading process such as knowledge retrieval. More accessible knowledge can be retrieved faster than less accessible knowledge. For instance, in reading tasks, the time on task effect is negative and the more negative, the easier a task is. However, if the accuracy of cognitive test responses increases with response time, then the process is of an upgrading nature, with an increasing success rate as a function of response time. For example, problem-solving takes time, and fast responses are less likely to be well-founded responses. It is of course also possible that the relationship is curvilinear, as when an increasing success rate is followed by a decreasing success rate or vice versa.

Hypothesis. The present study argues the relationship between response time on task and response accuracy can be positive, negative, or curvilinear, which depends on cognitive nature of task items holding ability of the subjects and difficulty of the items constant.

Methodology. Data from a subsection of GRE quantitative test were available. We will use generalized linear mixed models. A linear model means a linear combination of predictors determining the probability of person p for answering item i correctly. Modeling mixed effects means both random effects and fixed effects are included. Fixed effects refer to constants across test takers. The models are equivalent with advanced IRT models that go beyond the regular modeling of test responses in terms of one or more latent variables and item parameters. The lme4 package for R will be utilized to conduct the statistical calculation.

Research questions. 1. What is the relationship between response accuracy and response speed? 2. What is the correlation between response accuracy and type of response time (fast response vs slow response) after controlling ability of people?

Preliminary Findings. 1. There is a negative relationship between response time and response accuracy. The success rate declines with elapsing response time. 2. The correlation between the two response latent variables (fast and slow) is 1.0, indicating the time on task effects between respond time types are not different.

Implications. The right amount of testing time in CAT is important\—too much is wasteful and costly, too little impacts score validity. The study is expected to provide new perception on the relationship between response time and response accuracy, which in turn, contribute to the best timing strategy in CAT\—with or without time constraints.

Session Video

}, keywords = {CAT, response accuracy, Response time}, url = {https://drive.google.com/open?id=1yYP01bzGrKvJnfLwepcAoQQ2F4TdSvZ2}, author = {Yang Shi} } @article {2613, title = {Robust Automated Test Assembly for Testlet-Based Tests: An Illustration with Analytical Reasoning Items}, journal = {Frontiers in Education}, volume = {2}, year = {2017}, pages = {63}, issn = {2504-284X}, doi = {10.3389/feduc.2017.00063}, url = {https://www.frontiersin.org/article/10.3389/feduc.2017.00063}, author = {Veldkamp, Bernard P. and Paap, Muirne C. S.} } @conference {2630, title = {Scripted On-the-fly Multistage Testing}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

On-the-fly multistage testing (OMST) was introduced recently as a promising alternative to preassembled MST. A decidedly appealing feature of both is the reviewability of items within the current stage. However, the fundamental difference is that, instead of routing to a preassembled module, OMST adaptively assembles a module at each stage according to an interim ability estimate. This produces more individualized forms with finer measurement precision, but imposing nonstatistical constraints and controlling item exposure become more cumbersome. One recommendation is to use the maximum priority index followed by a remediation step to satisfy content constraints, and the Sympson-Hetter method with a stratified item bank for exposure control.

However, these methods can be computationally expensive, thereby impeding practical implementation. Therefore, this study investigated the script method as a simpler solution to the challenge of strict content balancing and effective item exposure control in OMST. The script method was originally devised as an item selection algorithm for CAT and generally proceeds as follows: For a test with m items, there are m slots to be filled, and an item is selected according to pre-defined rules for each slot. For the first slot, randomly select an item from a designated content area (collection). For each subsequent slot, 1) Discard any enemies of items already administered in previous slots; 2) Draw a designated number of candidate items (selection length) from the designated collection according to the current ability estimate; 3) Randomly select one item from the set of candidates. There are two distinct features of the script method. First, a predetermined sequence of collections guarantees meeting content specifications. The specific ordering may be determined either randomly or deliberately by content experts. Second, steps 2 and 3 depict a method of exposure control, in which selection length balances item usage at the possible expense of ability estimation accuracy. The adaptation of the script method to OMST is straightforward. For the first module, randomly select each item from a designated collection. For each subsequent module, the process is the same as in scripted CAT (SCAT) except the same ability estimate is used for the selection of all items within the module. A series of simulations was conducted to evaluate the performance of scripted OMST (SOMST, with 3 or 4 evenly divided stages) relative to SCAT under various item exposure restrictions. In all conditions, reliability was maximized by programming an optimization algorithm that searches for the smallest possible selection length for each slot within the constraints. Preliminary results indicated that SOMST is certainly a capable design with performance comparable to that of SCAT. The encouraging findings and ease of implementation highly motivate the prospect of operational use for large-scale assessments.

Presentation Video

}, keywords = {CAT, multistage testing, On-the-fly testing}, url = {https://drive.google.com/open?id=1wKuAstITLXo6BM4APf2mPsth1BymNl-y}, author = {Edison Choe and Bruce Williams and Sung-Hyuck Lee} } @conference {2639, title = {A Simulation Study to Compare Classification Method in Cognitive Diagnosis Computerized Adaptive Testing}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

Cognitive Diagnostic Computerized Adaptive Testing (CD-CAT) combines the strengths of both CAT and cognitive diagnosis. Cognitive diagnosis models that can be viewed as restricted latent class models have been developed to classify the examinees into the correct profile of skills that have been mastered and those that have not so as to get more efficient remediation. Chiu \& Douglas (2013) introduces a nonparametric procedure that only requires specification of Q-matrix to classify by proximity to ideal response pattern. In this article, we compare nonparametric procedure with common profile estimation method like maximum a posterior (MAP) in CD-CAT. Simulation studies consider a variety of Q-matrix structure, the number of attributes, ways to generate attribute profiles,\ and item quality. Results indicate that nonparametric procedure consistently gets the higher pattern and attribute recovery rate in nearly all conditions.

References

Chiu, C.-Y., \& Douglas, J. (2013). A nonparametric approach to cognitive diagnosis by proximity to ideal response patterns. Journal of Classification, 30, 225-250. doi: 10.1007/s00357-013-9132-9

Session Video

}, url = {https://drive.google.com/open?id=1jCL3fPZLgzIdwvEk20D-FliZ15OTUtpr}, author = {Jing Yang and Jian Tao and Hua-Hua Chang and Ning-Zhong Shi} } @conference {2628, title = {Using Automated Item Generation in a Large-scale Medical Licensure Exam Program: Lessons Learned.}, booktitle = {2017 IACAT Conference}, year = {2017}, month = {08.2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

On-demand testing has become commonplace with most large-scale testing programs. Continuous testing is appealing for candidates in that it affords greater flexibility in scheduling a session at the desired location. Furthermore, the push for more comprehensive systems of assessment (e.g. CBAL) is predicated on the availability of more frequently administered tasks given the purposeful link between instruction and assessment in these frameworks. However, continuous testing models impose several challenges to programs, including overexposure of items. Robust item banks are therefore needed to support routine retirement and replenishment of items. In a traditional approach to developing items, content experts select a topic and then develop an item consisting of a stem, lead-in question, a correct answer and list of distractors. The item then undergoes review by a panel of experts to validate the content and identify any potential flaws. The process involved in developing quality MCQ items can be time-consuming as well as costly, with estimates as high as $1500-$2500 USD per item (Rudner, 2010). The Medical Council of Canada (MCC) has been exploring a novel item development process to supplement traditional approaches. Specifically, the use of automated item generation (AIG), which uses technology to generate test items from cognitive models, has been studied for over five years. Cognitive models are representations of the knowledge and skills that are required to solve any given problem. While developing a cognitive model for a medical scenario, for example, content experts are asked to deconstruct the (clinical) reasoning process involved via clearly stated variables and related elements. The latter information is then entered into a computer program that uses algorithms to generate MCQs. The MCC has been piloting AIG \–based items for over five years with the MCC Qualifying Examination Part I (MCCQE I), a pre-requisite for licensure in Canada. The aim of this presentation is to provide an overview of the practical lessons learned in the use and operational rollout of AIG with the MCCQE I. Psychometrically, the quality of the items is at least equal, and in many instances superior, to that of traditionally written MCQs, based on difficulty, discrimination, and information. In fact, 96\% of the AIG based items piloted in a recent administration were retained for future operational scoring based on pre-defined inclusion criteria. AIG also offers a framework for the systematic creation of plausible distractors, in that the content experts not only need to provide the clinical reasoning underlying a correct response but also the cognitive errors associated with each of the distractors (Lai et al. 2016). Consequently, AIG holds great promise in regard to improving and tailoring diagnostic feedback for remedial purposes (Pugh, De Champlain, Gierl, Lai, Touchie, 2016). Furthermore, our test development process has been greatly enhanced by the addition of AIG as it requires that item writers use metacognitive skills to describe how they solve problems. We are hopeful that sharing our experiences with attendees might not only help other testing organizations interested in adopting AIG, but also foster discussion which might benefit all participants.

References

Lai, H., Gierl, M.J., Touchie, C., Pugh, D., Boulais, A.P., \& De Champlain, A.F. (2016). Using automatic item generation to improve the quality of MCQ distractors. Teaching and Learning in Medicine, 28, 166-173.

Pugh, D., De Champlain, A.F., Lai, H., Gierl, M., \& Touchie, C. (2016). Using cognitive models to develop quality multiple choice questions. Medical Teacher, 38, 838-843.

Rudner, L. (2010). Implementing the Graduate Management Admission Test Computerized Adaptive Test. In W. van der Linden \& C. Glass (Eds.), Elements of adaptive testing (pp. 151-165). New York, NY: Springer.\ 

Presentation Video

}, keywords = {Automated item generation, large scale, medical licensure}, url = {https://drive.google.com/open?id=14N8hUc8qexAy5W_94TykEDABGVIJHG1h}, author = {Andr{\'e} F. De Champlain} } @conference {2633, title = {Using Bayesian Decision Theory in Cognitive Diagnosis Computerized Adaptive Testing}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata Japan}, abstract = {

Cognitive diagnosis computerized adaptive testing (CD-CAT) purports to provide each individual a profile about the strengths and weaknesses of attributes or skills with computerized adaptive testing. In the CD-CAT literature, researchers dedicated to evolving item selection algorithms to improve measurement efficiency, and most algorithms were developed based on information theory. By the discontinuous nature of the latent variables in CD-CAT, this study introduced an alternative for item selection, called the minimum expected cost (MEC) method, which was derived based on Bayesian decision theory. Using simulations, the MEC method was evaluated against the posterior weighted Kullback-Leibler (PWKL) information, the modified PWKL (MPWKL), and the mutual information (MI) methods by manipulating item bank quality, item selection algorithm, and termination rule. Results indicated that, regardless of item quality and termination criterion, the MEC, MPWKL, and MI methods performed very similarly and they all outperformed the PWKL method in classification accuracy and test efficiency, especially in short tests; the MEC method had more efficient item bank usage than the MPWKL and MI methods. Moreover, the MEC method could consider the costs of incorrect decisions and improve classification accuracy and test efficiency when a particular profile was of concern. All the results suggest the practicability of the MEC method in CD-CAT.

Session Video

}, keywords = {Bayesian Decision Theory, CD-CAT}, author = {Chia-Ling Hsu and Wen-Chung Wang and ShuYing Chen} } @conference {2661, title = {Using Computerized Adaptive Testing to Detect Students{\textquoteright} Misconceptions: Exploration of Item Selection}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

Owning misconceptions impedes learning, thus detecting misconceptions through assessments is crucial to facilitate teaching. However, most computerized adaptive testing (CAT) applications to diagnose examinees\’ attribute profiles focus on whether examinees mastering correct concepts or not. In educational scenario, teachers and students have to figure out the misconceptions underlying incorrect answers after obtaining the scores from assessments and then correct the corresponding misconceptions. The Scaling Individuals and Classifying Misconceptions (SICM) models proposed by Bradshaw and Templin (2014) fill this gap. SICMs can identify a student\’s misconceptions directly from the distractors of multiple-choice questions and report whether s/he own the misconceptions or not. Simultaneously, SICM models are able to estimate a continuous ability within the item response theory (IRT) framework to fulfill the needs of policy-driven assessment systems relying on scaling examinees\’ ability. However, the advantage of providing estimations for two types of latent variables also causes complexity of model estimation. More items are required to achieve the same accuracies for both classification and estimation compared to dichotomous DCMs and to IRT, respectively. Thus, we aim to develop a CAT using the SICM models (SICM-CAT) to estimate students\’ misconceptions and continuous abilities simultaneously using fewer items than a linear test.

To achieve this goal, in this study, our research questions mainly focus on establishing several item selection rules that target on providing both accurate classification results and continuous ability estimations using SICM-CAT. The first research question is which information criterion to be used. The Kullback\–Leibler (KL) divergence is the first choice, as it can naturally combine the continuous and discrete latent variables. Based on this criterion, we propose an item selection index that can nicely integrate the two types of information. Based on this index, the items selected in real time could discriminate the examinee\’s current misconception profile and ability estimates from other possible estimates to the most extent. The second research question is about how to adaptively balance the estimations of the misconception profile and the continuous latent ability. Mimic the idea of the Hybrid Design proposed by Wang et al. (2016), we propose a design framework which makes the item selection transition from the group-level to the item-level. We aim to explore several design questions, such as how to select the transiting point and which latent variable estimation should be targeted first.

Preliminary results indicated that the SICM-CAT based on the proposed item selection index could classify examinees into different latent classes and measure their latent abilities compared with the random selection method more accurately and reliably under all the simulation conditions. We plan to compare different CAT designs based on our proposed item selection rules with the best linear test as the next step. We expect that the SICM-CAT is able to use shorter test length while retaining the same accuracies and reliabilities.

References

Bradshaw, L., \& Templin, J. (2014). Combining item response theory and diagnostic classification models: A psychometric model for scaling ability and diagnosing misconceptions. Psychometrika, 79(3), 403-425.

Wang, S., Lin, H., Chang, H. H., \& Douglas, J. (2016). Hybrid computerized adaptive testing: from group sequential design to fully sequential design. Journal of Educational Measurement, 53(1), 45-62.

Session Video

}, keywords = {CAT, incorrect answering, Student Misconception}, author = {Yawei Shen and Yu Bao and Shiyu Wang and Laine Bradshaw} } @conference {2659, title = {Using Determinantal Point Processes for Multistage Testing}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

Multistage tests are a generalization of computerized adaptive tests (CATs), that allow to ask batches of questions before starting to adapt the process, instead of asking questions one by one. In order to be provided in real-world scenarios, they should be assembled on the fly, and recent models have been designed accordingly (Zheng \& Chang, 2015). We will present a new algorithm for assembling multistage tests, based on a recent technique in machine learning called determinantal point processes. We will illustrate this technique on various student data that come from fraction subtraction items, or massive online open courses.

In multidimensional CATs, feature vectors are estimated for students and questions, and the probability that a student gets a question correct depends on how much their feature vector is correlated with the question feature vector. In other words, questions that are close in space lead to similar response patterns from the students. Therefore, in order to maximize the information of a batch of questions, the volume spanned by their feature vectors should be as large as possible. Determinantal point processes allow to sample efficiently batches of items from a bank that are diverse, i.e., that span a large volume: it is actually possible to draw k items among n with a O(nk3 ) complexity, which is convenient for large databases of 10,000s of items.

References

Zheng, Y., \& Chang, H. H. (2015). On-the-fly assembled multistage adaptive testing. Applied Psychological Measurement, 39(2), 104-118.

Session Video

}, keywords = {Multidimentional CAT, multistage testing}, url = {https://drive.google.com/open?id=1GkJkKTEFWK3srDX8TL4ra_Xbsliemu1R}, author = {Jill-J{\^e}nn Vie} } @article {2616, title = {The validation of a computer-adaptive test (CAT) for assessing health-related quality of life in children and adolescents in a clinical sample: study design, methods and first results of the Kids-CAT study}, journal = {Quality of Life Research}, volume = {26}, number = {5}, year = {2017}, month = {May}, pages = {1105{\textendash}1117}, abstract = {Recently, we developed a computer-adaptive test (CAT) for assessing health-related quality of life (HRQoL) in children and adolescents: the Kids-CAT. It measures five generic HRQoL dimensions. The aims of this article were (1) to present the study design and (2) to investigate its psychometric properties in a clinical setting.}, issn = {1573-2649}, doi = {10.1007/s11136-016-1437-9}, url = {https://doi.org/10.1007/s11136-016-1437-9}, author = {Barthel, D. and Otto, C. and Nolte, S. and Meyrose, A.-K. and Fischer, F. and Devine, J. and Walter, O. and Mierke, A. and Fischer, K. I. and Thyen, U. and Klein, M. and Ankermann, T. and Rose, M. and Ravens-Sieberer, U.} } @article {2458, title = {Bayesian Networks in Educational Assessment: The State of the Field}, journal = {Applied Psychological Measurement}, volume = {40}, number = {1}, year = {2016}, pages = {3-21}, abstract = {Bayesian networks (BN) provide a convenient and intuitive framework for specifying complex joint probability distributions and are thus well suited for modeling content domains of educational assessments at a diagnostic level. BN have been used extensively in the artificial intelligence community as student models for intelligent tutoring systems (ITS) but have received less attention among psychometricians. This critical review outlines the existing research on BN in educational assessment, providing an introduction to the ITS literature for the psychometric community, and points out several promising research paths. The online appendix lists 40 assessment systems that serve as empirical examples of the use of BN for educational assessment in a variety of domains.}, doi = {10.1177/0146621615590401}, url = {http://apm.sagepub.com/content/40/1/3.abstract}, author = {Culbertson, Michael J.} } @article {2486, title = {A Comparison of Constrained Item Selection Methods in Multidimensional Computerized Adaptive Testing}, journal = {Applied Psychological Measurement}, volume = {40}, number = {5}, year = {2016}, pages = {346-360}, abstract = {The construction of assessments in computerized adaptive testing (CAT) usually involves fulfilling a large number of statistical and non-statistical constraints to meet test specifications. To improve measurement precision and test validity, the multidimensional priority index (MPI) and the modified MPI (MMPI) can be used to monitor many constraints simultaneously under a between-item and a within-item multidimensional framework, respectively. As both item selection methods can be implemented easily and computed efficiently, they are important and useful for operational CATs; however, no thorough simulation study has compared the performance of these two item selection methods under two different item bank structures. The purpose of this study was to investigate the efficiency of the MMPI and the MPI item selection methods under the between-item and within-item multidimensional CAT through simulations. The MMPI and the MPI item selection methods yielded similar performance in measurement precision for both multidimensional pools and yielded similar performance in exposure control and constraint management for the between-item multidimensional pool. For the within-item multidimensional pool, the MPI method yielded slightly better performance in exposure control but yielded slightly worse performance in constraint management than the MMPI method.}, doi = {10.1177/0146621616639305}, url = {http://apm.sagepub.com/content/40/5/346.abstract}, author = {Su, Ya-Hui} } @article {2461, title = {On Computing the Key Probability in the Stochastically Curtailed Sequential Probability Ratio Test}, journal = {Applied Psychological Measurement}, volume = {40}, number = {2}, year = {2016}, pages = {142-156}, abstract = {The Stochastically Curtailed Sequential Probability Ratio Test (SCSPRT) is a termination criterion for computerized classification tests (CCTs) that has been shown to be more efficient than the well-known Sequential Probability Ratio Test (SPRT). The performance of the SCSPRT depends on computing the probability that at a given stage in the test, an examinee{\textquoteright}s current interim classification status will not change before the end of the test. Previous work discusses two methods of computing this probability, an exact method in which all potential responses to remaining items are considered and an approximation based on the central limit theorem (CLT) requiring less computation. Generally, the CLT method should be used early in the test when the number of remaining items is large, and the exact method is more appropriate at later stages of the test when few items remain. However, there is currently a dearth of information as to the performance of the SCSPRT when using the two methods. For the first time, the exact and CLT methods of computing the crucial probability are compared in a simulation study to explore whether there is any effect on the accuracy or efficiency of the CCT. The article is focused toward practitioners and researchers interested in using the SCSPRT as a termination criterion in an operational CCT.}, doi = {10.1177/0146621615611633}, url = {http://apm.sagepub.com/content/40/2/142.abstract}, author = {Huebner, Alan R. and Finkelman, Matthew D.} } @article {2618, title = {On the effect of adding clinical samples to validation studies of patient-reported outcome item banks: a simulation study}, journal = {Quality of Life Research}, volume = {25}, number = {7}, year = {2016}, pages = {1635{\textendash}1644}, abstract = {To increase the precision of estimated item parameters of item response theory models for patient-reported outcomes, general population samples are often enriched with samples of clinical respondents. Calibration studies provide little information on how this sampling scheme is incorporated into model estimation. In a small simulation study the impact of ignoring the oversampling of clinical respondents on item and person parameters is illustrated.}, issn = {1573-2649}, doi = {10.1007/s11136-015-1199-9}, url = {https://doi.org/10.1007/s11136-015-1199-9}, author = {Smits, Niels} } @article {2491, title = {Effect of Imprecise Parameter Estimation on Ability Estimation in a Multistage Test in an Automatic Item Generation Context }, journal = {Journal of Computerized Adaptive Testing}, volume = {4}, year = {2016}, pages = {1-18}, keywords = {Adaptive Testing, automatic item generation, errors in item parameters, item clones, multistage testing}, issn = {2165-6592 }, doi = {10.7333/1608-040101}, url = {http://iacat.org/jcat/index.php/jcat/article/view/59/27}, author = {Colvin, Kimberly and Keller, Lisa A and Robin, Frederic} } @article {2507, title = {Exploration of Item Selection in Dual-Purpose Cognitive Diagnostic Computerized Adaptive Testing: Based on the RRUM}, journal = {Applied Psychological Measurement}, volume = {40}, number = {8}, year = {2016}, pages = {625-640}, abstract = {Cognitive diagnostic computerized adaptive testing (CD-CAT) can be divided into two broad categories: (a) single-purpose tests, which are based on the subject{\textquoteright}s knowledge state (KS) alone, and (b) dual-purpose tests, which are based on both the subject{\textquoteright}s KS and traditional ability level ( ). This article seeks to identify the most efficient item selection method for the latter type of CD-CAT corresponding to various conditions and various evaluation criteria, respectively, based on the reduced reparameterized unified model (RRUM) and the two-parameter logistic model of item response theory (IRT-2PLM). The Shannon entropy (SHE) and Fisher information methods were combined to produce a new synthetic item selection index, that is, the {\textquotedblleft}dapperness with information (DWI){\textquotedblright} index, which concurrently considers both KS and within one step. The new method was compared with four other methods. The results showed that, in most conditions, the new method exhibited the best performance in terms of KS estimation and the second-best performance in terms of estimation. Item utilization uniformity and computing time are also considered for all the competing methods.}, doi = {10.1177/0146621616666008}, url = {http://apm.sagepub.com/content/40/8/625.abstract}, author = {Dai, Buyun and Zhang, Minqiang and Li, Guangming} } @article {2506, title = {High-Efficiency Response Distribution{\textendash}Based Item Selection Algorithms for Short-Length Cognitive Diagnostic Computerized Adaptive Testing}, journal = {Applied Psychological Measurement}, volume = {40}, number = {8}, year = {2016}, pages = {608-624}, abstract = {Cognitive diagnostic computerized adaptive testing (CD-CAT) purports to obtain useful diagnostic information with great efficiency brought by CAT technology. Most of the existing CD-CAT item selection algorithms are evaluated when test length is fixed and relatively long, but some applications of CD-CAT, such as in interim assessment, require to obtain the cognitive pattern with a short test. The mutual information (MI) algorithm proposed by Wang is the first endeavor to accommodate this need. To reduce the computational burden, Wang provided a simplified scheme, but at the price of scale/sign change in the original index. As a result, it is very difficult to combine it with some popular constraint management methods. The current study proposes two high-efficiency algorithms, posterior-weighted cognitive diagnostic model (CDM) discrimination index (PWCDI) and posterior-weighted attribute-level CDM discrimination index (PWACDI), by modifying the CDM discrimination index. They can be considered as an extension of the Kullback{\textendash}Leibler (KL) and posterior-weighted KL (PWKL) methods. A pre-calculation strategy has also been developed to address the computational issue. Simulation studies indicate that the newly developed methods can produce results comparable with or better than the MI and PWKL in both short and long tests. The other major advantage is that the computational issue has been addressed more elegantly than MI. PWCDI and PWACDI can run as fast as PWKL. More importantly, they do not suffer from the problem of scale/sign change as MI and, thus, can be used with constraint management methods together in a straightforward manner.}, doi = {10.1177/0146621616665196}, url = {http://apm.sagepub.com/content/40/8/608.abstract}, author = {Zheng, Chanjin and Chang, Hua-Hua} } @article {2462, title = {Hybrid Computerized Adaptive Testing: From Group Sequential Design to Fully Sequential Design}, journal = {Journal of Educational Measurement}, volume = {53}, number = {1}, year = {2016}, pages = {45{\textendash}62}, abstract = {Computerized adaptive testing (CAT) and multistage testing (MST) have become two of the most popular modes in large-scale computer-based sequential testing. ~Though most designs of CAT and MST exhibit strength and weakness in recent large-scale implementations, there is no simple answer to the question of which design is better because different modes may fit different practical situations. This article proposes a hybrid adaptive framework to combine both CAT and MST, inspired by an analysis of the history of CAT and MST. The proposed procedure is a design which transitions from a group sequential design to a fully sequential design. This allows for the robustness of MST in early stages, but also shares the advantages of CAT in later stages with fine tuning of the ability estimator once its neighborhood has been identified. Simulation results showed that hybrid designs following our proposed principles provided comparable or even better estimation accuracy and efficiency than standard CAT and MST designs, especially for examinees at the two ends of the ability range.}, issn = {1745-3984}, doi = {10.1111/jedm.12100}, url = {http://dx.doi.org/10.1111/jedm.12100}, author = {Wang, Shiyu and Lin, Haiyan and Chang, Hua-Hua and Douglas, Jeff} } @article {2488, title = {On the Issue of Item Selection in Computerized Adaptive Testing With Response Times}, journal = {Journal of Educational Measurement}, volume = {53}, number = {2}, year = {2016}, pages = {212{\textendash}228}, abstract = {Many standardized tests are now administered via computer rather than paper-and-pencil format. The computer-based delivery mode brings with it certain advantages. One advantage is the ability to adapt the difficulty level of the test to the ability level of the test taker in what has been termed computerized adaptive testing (CAT). A second advantage is the ability to record not only the test taker{\textquoteright}s response to each item (i.e., question), but also the amount of time the test taker spends considering and answering each item. Combining these two advantages, various methods were explored for utilizing response time data in selecting appropriate items for an individual test taker.Four strategies for incorporating response time data were evaluated, and the precision of the final test-taker score was assessed by comparing it to a benchmark value that did not take response time information into account. While differences in measurement precision and testing times were expected, results showed that the strategies did not differ much with respect to measurement precision but that there were differences with regard to the total testing time.}, issn = {1745-3984}, doi = {10.1111/jedm.12110}, url = {http://dx.doi.org/10.1111/jedm.12110}, author = {Veldkamp, Bernard P.} } @article {2481, title = {Maximum Likelihood Score Estimation Method With Fences for Short-Length Tests and Computerized Adaptive Tests}, journal = {Applied Psychological Measurement}, volume = {40}, number = {4}, year = {2016}, pages = {289-301}, abstract = {A critical shortcoming of the maximum likelihood estimation (MLE) method for test score estimation is that it does not work with certain response patterns, including ones consisting only of all 0s or all 1s. This can be problematic in the early stages of computerized adaptive testing (CAT) administration and for tests short in length. To overcome this challenge, test practitioners often set lower and upper bounds of theta estimation and truncate the score estimation to be one of those bounds when the log likelihood function fails to yield a peak due to responses consisting only of 0s or 1s. Even so, this MLE with truncation (MLET) method still cannot handle response patterns in which all harder items are correct and all easy items are incorrect. Bayesian-based estimation methods such as the modal a posteriori (MAP) method or the expected a posteriori (EAP) method can be viable alternatives to MLE. The MAP or EAP methods, however, are known to result in estimates biased toward the center of a prior distribution, resulting in a shrunken score scale. This study introduces an alternative approach to MLE, called MLE with fences (MLEF). In MLEF, several imaginary {\textquotedblleft}fence{\textquotedblright} items with fixed responses are introduced to form a workable log likelihood function even with abnormal response patterns. The findings of this study suggest that, unlike MLET, the MLEF can handle any response patterns and, unlike both MAP and EAP, results in score estimates that do not cause shrinkage of the theta scale.}, doi = {10.1177/0146621616631317}, url = {http://apm.sagepub.com/content/40/4/289.abstract}, author = {Han, Kyung T.} } @article {2463, title = {Modeling Student Test-Taking Motivation in the Context of an Adaptive Achievement Test}, journal = {Journal of Educational Measurement}, volume = {53}, number = {1}, year = {2016}, pages = {86{\textendash}105}, abstract = {This study examined the utility of response time-based analyses in understanding the behavior of unmotivated test takers. For the data from an adaptive achievement test, patterns of observed rapid-guessing behavior and item response accuracy were compared to the behavior expected under several types of models that have been proposed to represent unmotivated test taking behavior. Test taker behavior was found to be inconsistent with these models, with the exception of the effort-moderated model. Effort-moderated scoring was found to both yield scores that were more accurate than those found under traditional scoring, and exhibit improved person fit statistics. In addition, an effort-guided adaptive test was proposed and shown by a simulation study to alleviate item difficulty mistargeting caused by unmotivated test taking.}, issn = {1745-3984}, doi = {10.1111/jedm.12102}, url = {http://dx.doi.org/10.1111/jedm.12102}, author = {Wise, Steven L. and Kingsbury, G. Gage} } @article {2487, title = {Monitoring Items in Real Time to Enhance CAT Security}, journal = {Journal of Educational Measurement}, volume = {53}, number = {2}, year = {2016}, pages = {131{\textendash}151}, abstract = {An IRT-based sequential procedure is developed to monitor items for enhancing test security. The procedure uses a series of statistical hypothesis tests to examine whether the statistical characteristics of each item under inspection have changed significantly during CAT administration. This procedure is compared with a previously developed CTT-based procedure through simulation studies. The results show that when the total number of examinees is fixed both procedures can control the rate of type I errors at any reasonable significance level by choosing an appropriate cutoff point and meanwhile maintain a low rate of type II errors. Further, the IRT-based method has a much lower type II error rate or more power than the CTT-based method when the number of compromised items is small (e.g., 5), which can be achieved if the IRT-based procedure can be applied in an active mode in the sense that flagged items can be replaced with new items.}, issn = {1745-3984}, doi = {10.1111/jedm.12104}, url = {http://dx.doi.org/10.1111/jedm.12104}, author = {Zhang, Jinming and Li, Jie} } @article {2492, title = {Multidimensional Computerized Adaptive Testing for Classifying Examinees With Within-Dimensionality}, journal = {Applied Psychological Measurement}, volume = {40}, number = {6}, year = {2016}, pages = {387-404}, abstract = {A classification method is presented for adaptive classification testing with a multidimensional item response theory (IRT) model in which items are intended to measure multiple traits, that is, within-dimensionality. The reference composite is used with the sequential probability ratio test (SPRT) to make decisions and decide whether testing can be stopped before reaching the maximum test length. Item-selection methods are provided that maximize the determinant of the information matrix at the cutoff point or at the projected ability estimate. A simulation study illustrates the efficiency and effectiveness of the classification method. Simulations were run with the new item-selection methods, random item selection, and maximization of the determinant of the information matrix at the ability estimate. The study also showed that the SPRT with multidimensional IRT has the same characteristics as the SPRT with unidimensional IRT and results in more accurate classifications than the latter when used for multidimensional data.}, doi = {10.1177/0146621616648931}, url = {http://apm.sagepub.com/content/40/6/387.abstract}, author = {van Groen, Maaike M. and Eggen, Theo J. H. M. and Veldkamp, Bernard P.} } @article {2493, title = {Online Calibration of Polytomous Items Under the Generalized Partial Credit Model}, journal = {Applied Psychological Measurement}, volume = {40}, number = {6}, year = {2016}, pages = {434-450}, abstract = {Online calibration is a technology-enhanced architecture for item calibration in computerized adaptive tests (CATs). Many CATs are administered continuously over a long term and rely on large item banks. To ensure test validity, these item banks need to be frequently replenished with new items, and these new items need to be pretested before being used operationally. Online calibration dynamically embeds pretest items in operational tests and calibrates their parameters as response data are gradually obtained through the continuous test administration. This study extends existing formulas, procedures, and algorithms for dichotomous item response theory models to the generalized partial credit model, a popular model for items scored in more than two categories. A simulation study was conducted to investigate the developed algorithms and procedures under a variety of conditions, including two estimation algorithms, three pretest item selection methods, three seeding locations, two numbers of score categories, and three calibration sample sizes. Results demonstrated acceptable estimation accuracy of the two estimation algorithms in some of the simulated conditions. A variety of findings were also revealed for the interacted effects of included factors, and recommendations were made respectively.}, doi = {10.1177/0146621616650406}, url = {http://apm.sagepub.com/content/40/6/434.abstract}, author = {Zheng, Yi} } @article {2504, title = {Optimal Reassembly of Shadow Tests in CAT}, journal = {Applied Psychological Measurement}, volume = {40}, number = {7}, year = {2016}, pages = {469-485}, abstract = {Even in the age of abundant and fast computing resources, concurrency requirements for large-scale online testing programs still put an uninterrupted delivery of computer-adaptive tests at risk. In this study, to increase the concurrency for operational programs that use the shadow-test approach to adaptive testing, we explored various strategies aiming for reducing the number of reassembled shadow tests without compromising the measurement quality. Strategies requiring fixed intervals between reassemblies, a certain minimal change in the interim ability estimate since the last assembly before triggering a reassembly, and a hybrid of the two strategies yielded substantial reductions in the number of reassemblies without degradation in the measurement accuracy. The strategies effectively prevented unnecessary reassemblies due to adapting to the noise in the early test stages. They also highlighted the practicality of the shadow-test approach by minimizing the computational load involved in its use of mixed-integer programming.}, doi = {10.1177/0146621616654597}, url = {http://apm.sagepub.com/content/40/7/469.abstract}, author = {Choi, Seung W. and Moellering, Karin T. and Li, Jie and van der Linden, Wim J.} } @article {2505, title = {Parameter Drift Detection in Multidimensional Computerized Adaptive Testing Based on Informational Distance/Divergence Measures}, journal = {Applied Psychological Measurement}, volume = {40}, number = {7}, year = {2016}, pages = {534-550}, abstract = {An informational distance/divergence-based approach is proposed to detect the presence of parameter drift in multidimensional computerized adaptive testing (MCAT). The study presents significance testing procedures for identifying changes in multidimensional item response functions (MIRFs) over time based on informational distance/divergence measures that capture the discrepancy between two probability functions. To approximate the MIRFs from the observed response data, the k-nearest neighbors algorithm is used with the random search method. A simulation study suggests that the distance/divergence-based drift measures perform effectively in identifying the instances of parameter drift in MCAT. They showed moderate power with small samples of 500 examinees and excellent power when the sample size was as large as 1,000. The proposed drift measures also adequately controlled for Type I error at the nominal level under the null hypothesis.}, doi = {10.1177/0146621616663676}, url = {http://apm.sagepub.com/content/40/7/534.abstract}, author = {Kang, Hyeon-Ah and Chang, Hua-Hua} } @article {2459, title = {Stochastic Curtailment of Questionnaires for Three-Level Classification: Shortening the CES-D for Assessing Low, Moderate, and High Risk of Depression}, journal = {Applied Psychological Measurement}, volume = {40}, number = {1}, year = {2016}, pages = {22-36}, abstract = {In clinical assessment, efficient screeners are needed to ensure low respondent burden. In this article, Stochastic Curtailment (SC), a method for efficient computerized testing for classification into two classes for observable outcomes, was extended to three classes. In a post hoc simulation study using the item scores on the Center for Epidemiologic Studies{\textendash}Depression Scale (CES-D) of a large sample, three versions of SC, SC via Empirical Proportions (SC-EP), SC via Simple Ordinal Regression (SC-SOR), and SC via Multiple Ordinal Regression (SC-MOR) were compared at both respondent burden and classification accuracy. All methods were applied under the regular item order of the CES-D and under an ordering that was optimal in terms of the predictive power of the items. Under the regular item ordering, the three methods were equally accurate, but SC-SOR and SC-MOR needed less items. Under the optimal ordering, additional gains in efficiency were found, but SC-MOR suffered from capitalization on chance substantially. It was concluded that SC-SOR is an efficient and accurate method for clinical screening. Strengths and weaknesses of the methods are discussed.}, doi = {10.1177/0146621615592294}, url = {http://apm.sagepub.com/content/40/1/22.abstract}, author = {Smits, Niels and Finkelman, Matthew D. and Kelderman, Henk} } @article {2482, title = {Using Response Time to Detect Item Preknowledge in Computer?Based Licensure Examinations}, journal = {Educational Measurement: Issues and Practice.}, volume = {35}, number = {38{\textendash}47}, year = {2016}, abstract = {This article addresses the issue of how to detect item preknowledge using item response time data in two computer-based large-scale licensure examinations. Item preknowledge is indicated by an unexpected short response time and a correct response. Two samples were used for detecting item preknowledge for each examination. The first sample was from the early stage of the operational test and was used for item calibration. The second sample was from the late stage of the operational test, which may feature item preknowledge. The purpose of this research was to explore whether there was evidence of item preknowledge and compromised items in the second sample using the parameters estimated from the first sample. The results showed that for one nonadaptive operational examination, two items (of 111) were potentially exposed, and two candidates (of 1,172) showed some indications of preknowledge on multiple items. For another licensure examination that featured computerized adaptive testing, there was no indication of item preknowledge or compromised items. Implications for detected aberrant examinees and compromised items are discussed in the article.}, doi = {http://dx.doi.org/10.1111/emip.12102}, author = {Qian H. and Staniewska, D. and Reckase, M. and Woo, A.} } @article {2384, title = {Assessing Individual-Level Impact of Interruptions During Online Testing}, journal = {Journal of Educational Measurement}, volume = {52}, number = {1}, year = {2015}, pages = {80{\textendash}105}, abstract = {With an increase in the number of online tests, the number of interruptions during testing due to unexpected technical issues seems to be on the rise. For example, interruptions occurred during several recent state tests. When interruptions occur, it is important to determine the extent of their impact on the examinees{\textquoteright} scores. Researchers such as Hill and Sinharay et~al. examined the impact of interruptions at an aggregate level. However, there is a lack of research on the assessment of impact of interruptions at an individual level. We attempt to fill that void. We suggest four methodological approaches, primarily based on statistical hypothesis testing, linear regression, and item response theory, which can provide evidence on the individual-level impact of interruptions. We perform a realistic simulation study to compare the Type~I error rate and power of the suggested approaches. We then apply the approaches to data from the 2013 Indiana Statewide Testing for Educational Progress-Plus~(ISTEP+) test that experienced interruptions.}, issn = {1745-3984}, doi = {10.1111/jedm.12064}, url = {http://dx.doi.org/10.1111/jedm.12064}, author = {Sinharay, Sandip and Wan, Ping and Choi, Seung W. and Kim, Dong-In} } @article {2387, title = {a-Stratified Computerized Adaptive Testing in the Presence of Calibration Error}, journal = {Educational and Psychological Measurement}, volume = {75}, number = {2}, year = {2015}, pages = {260-283}, abstract = {a-Stratified computerized adaptive testing with b-blocking (AST), as an alternative to the widely used maximum Fisher information (MFI) item selection method, can effectively balance item pool usage while providing accurate latent trait estimates in computerized adaptive testing (CAT). However, previous comparisons of these methods have treated item parameter estimates as if they are the true population parameter values. Consequently, capitalization on chance may occur. In this article, we examined the performance of the AST method under more realistic conditions where item parameter estimates instead of true parameter values are used in the CAT. Its performance was compared against that of the MFI method when the latter is used in conjunction with Sympson{\textendash}Hetter or randomesque exposure control. Results indicate that the MFI method, even when combined with exposure control, is susceptible to capitalization on chance. This is particularly true when the calibration sample size is small. On the other hand, AST is more robust to capitalization on chance. Consistent with previous investigations using true item parameter values, AST yields much more balanced item pool usage, with a small loss in the precision of latent trait estimates. The loss is negligible when the test is as long as 40 items.}, doi = {10.1177/0013164414530719}, url = {http://epm.sagepub.com/content/75/2/260.abstract}, author = {Cheng, Ying and Patton, Jeffrey M. and Shao, Can} } @article {2456, title = {Best Design for Multidimensional Computerized Adaptive Testing With the Bifactor Model}, journal = {Educational and Psychological Measurement}, volume = {75}, number = {6}, year = {2015}, pages = {954-978}, abstract = {Most computerized adaptive tests (CATs) have been studied using the framework of unidimensional item response theory. However, many psychological variables are multidimensional and might benefit from using a multidimensional approach to CATs. This study investigated the accuracy, fidelity, and efficiency of a fully multidimensional CAT algorithm (MCAT) with a bifactor model using simulated data. Four item selection methods in MCAT were examined for three bifactor pattern designs using two multidimensional item response theory models. To compare MCAT item selection and estimation methods, a fixed test length was used. The Ds-optimality item selection improved θ estimates with respect to a general factor, and either D- or A-optimality improved estimates of the group factors in three bifactor pattern designs under two multidimensional item response theory models. The MCAT model without a guessing parameter functioned better than the MCAT model with a guessing parameter. The MAP (maximum a posteriori) estimation method provided more accurate θ estimates than the EAP (expected a posteriori) method under most conditions, and MAP showed lower observed standard errors than EAP under most conditions, except for a general factor condition using Ds-optimality item selection.}, doi = {10.1177/0013164415575147}, url = {http://epm.sagepub.com/content/75/6/954.abstract}, author = {Seo, Dong Gi and Weiss, David J.} } @article {2355, title = {Comparing Simple Scoring With IRT Scoring of Personality Measures: The Navy Computer Adaptive Personality Scales}, journal = {Applied Psychological Measurement}, volume = {39}, number = {2}, year = {2015}, pages = {144-154}, abstract = {

This article analyzes data from U.S. Navy sailors (N = 8,956), with the central measure being the Navy Computer Adaptive Personality Scales (NCAPS). Analyses and results from this article extend and qualify those from previous research efforts by examining the properties of the NCAPS and its adaptive structure in more detail. Specifically, this article examines item exposure rates, the efficiency of item use based on item response theory (IRT)\–based Expected A Posteriori (EAP) scoring, and a comparison of IRT-EAP scoring with much more parsimonious scoring methods that appear to work just as well (stem-level scoring and dichotomous scoring). The cutting-edge nature of adaptive personality testing will necessitate a series of future efforts like this: to examine the benefits of adaptive scoring schemes and novel measurement methods continually, while pushing testing technology further ahead.

}, doi = {10.1177/0146621614559517}, url = {http://apm.sagepub.com/content/39/2/144.abstract}, author = {Oswald, Frederick L. and Shaw, Amy and Farmer, William L.} } @article {2383, title = {A Comparison of IRT Proficiency Estimation Methods Under Adaptive Multistage Testing}, journal = {Journal of Educational Measurement}, volume = {52}, number = {1}, year = {2015}, pages = {70{\textendash}79}, abstract = {This inquiry is an investigation of item response theory (IRT) proficiency estimators{\textquoteright} accuracy under multistage testing (MST). We chose a two-stage MST design that includes four modules (one at Stage 1, three at Stage 2) and three difficulty paths (low, middle, high). We assembled various two-stage MST panels (i.e., forms) by manipulating two assembly conditions in each module, such as difficulty level and module length. For each panel, we investigated the accuracy of examinees{\textquoteright} proficiency levels derived from seven IRT proficiency estimators. The choice of Bayesian (prior) versus non-Bayesian (no prior) estimators was of more practical significance than the choice of number-correct versus item-pattern scoring estimators. The Bayesian estimators were slightly more efficient than the non-Bayesian estimators, resulting in smaller overall error. Possible score changes caused by the use of different proficiency estimators would be nonnegligible, particularly for low- and high-performing examinees.}, issn = {1745-3984}, doi = {10.1111/jedm.12063}, url = {http://dx.doi.org/10.1111/jedm.12063}, author = {Kim, Sooyeon and Moses, Tim and Yoo, Hanwook (Henry)} } @article {2485, title = {Considering the Use of General and Modified Assessment Items in Computerized Adaptive Testing}, journal = {Applied Measurement in Education}, volume = {28}, number = {156{\textendash}167}, year = {2015}, abstract = {This article used several data sets from a large-scale state testing program to examine the feasibility of combining general and modified assessment items in computerized adaptive testing (CAT) for different groups of students. Results suggested that several of the assumptions made when employing this type of mixed-item CAT may not be met for students with disabilities that have typically taken alternate assessments based on modified achievement standards (AA-MAS). A simulation study indicated that the abilities of AA-MAS students can be underestimated or overestimated by the mixed-item CAT, depending on students{\textquoteright} location on the underlying ability scale. These findings held across grade levels and test lengths. The mixed-item CAT appeared to function well for non-AA-MAS students.}, doi = {http://dx.doi.org/10.1080/08957347.2014.1002921}, author = {Wyse, A. E. and Albano, A. D.} } @article {2453, title = {The Effect of Upper and Lower Asymptotes of IRT Models on Computerized Adaptive Testing}, journal = {Applied Psychological Measurement}, volume = {39}, number = {7}, year = {2015}, pages = {551-565}, abstract = {In this article, the effect of the upper and lower asymptotes in item response theory models on computerized adaptive testing is shown analytically. This is done by deriving the step size between adjacent latent trait estimates under the four-parameter logistic model (4PLM) and two models it subsumes, the usual three-parameter logistic model (3PLM) and the 3PLM with upper asymptote (3PLMU). The authors show analytically that the large effect of the discrimination parameter on the step size holds true for the 4PLM and the two models it subsumes under both the maximum information method and the b-matching method for item selection. Furthermore, the lower asymptote helps reduce the positive bias of ability estimates associated with early guessing, and the upper asymptote helps reduce the negative bias induced by early slipping. Relative step size between modeling versus not modeling the upper or lower asymptote under the maximum Fisher information method (MI) and the b-matching method is also derived. It is also shown analytically why the gain from early guessing is smaller than the loss from early slipping when the lower asymptote is modeled, and vice versa when the upper asymptote is modeled. The benefit to loss ratio is quantified under both the MI and the b-matching method. Implications of the analytical results are discussed.}, doi = {10.1177/0146621615585850}, url = {http://apm.sagepub.com/content/39/7/551.abstract}, author = {Cheng, Ying and Liu, Cheng} } @article {2484, title = {Evaluating Content Alignment in Computerized Adaptive Testing}, journal = {Educational Measurement: Issues and Practice}, volume = {34}, number = {41-48}, year = {2015}, abstract = {The alignment between a test and the content domain it measures represents key evidence for the validation of test score inferences. Although procedures have been developed for evaluating the content alignment of linear tests, these procedures are not readily applicable to computerized adaptive tests (CATs), which require large item pools and do not use fixed test forms. This article describes the decisions made in the development of CATs that influence and might threaten content alignment. It outlines a process for evaluating alignment that is sensitive to these threats and gives an empirical example of the process.}, doi = {http://dx.doi.org/10.1111/emip.12094}, author = {Wise, S. L. and Kingsbury, G. G. and Webb, N. L.} } @article {2455, title = {Implementing a CAT: The AMC Experience }, journal = {Journal of Computerized Adaptive Testing}, volume = {3}, year = {2015}, pages = {1-12}, type = {Applications and Implementations}, keywords = {adaptive, Assessment, computer, medical, online, Testing}, issn = {2165-6592}, doi = {10.7333/15100301001}, url = {http://www.iacat.org/jcat/index.php/jcat/article/view/52/25}, author = {Barnard, John J} } @article {2457, title = {Investigation of Response Changes in the GRE Revised General Test}, journal = {Educational and Psychological Measurement}, volume = {75}, number = {6}, year = {2015}, pages = {1002-1020}, abstract = {Research on examinees{\textquoteright} response changes on multiple-choice tests over the past 80 years has yielded some consistent findings, including that most examinees make score gains by changing answers. This study expands the research on response changes by focusing on a high-stakes admissions test{\textemdash}the Verbal Reasoning and Quantitative Reasoning measures of the GRE revised General Test. We analyzed data from 8,538 examinees for Quantitative and 9,140 for Verbal sections who took the GRE revised General Test in 12 countries. The analyses yielded findings consistent with prior research. In addition, as examinees{\textquoteright} ability increases, the benefit of response changing increases. The study yielded significant implications for both test agencies and test takers. Computer adaptive tests often do not allow the test takers to review and revise. Findings from this study confirm the benefit of such features.}, doi = {10.1177/0013164415573988}, url = {http://epm.sagepub.com/content/75/6/1002.abstract}, author = {Liu, Ou Lydia and Bridgeman, Brent and Gu, Lixiong and Xu, Jun and Kong, Nan} } @article {2393, title = {New Item Selection Methods for Cognitive Diagnosis Computerized Adaptive Testing}, journal = {Applied Psychological Measurement}, volume = {39}, number = {3}, year = {2015}, pages = {167-188}, abstract = {This article introduces two new item selection methods, the modified posterior-weighted Kullback{\textendash}Leibler index (MPWKL) and the generalized deterministic inputs, noisy {\textquotedblleft}and{\textquotedblright} gate (G-DINA) model discrimination index (GDI), that can be used in cognitive diagnosis computerized adaptive testing. The efficiency of the new methods is compared with the posterior-weighted Kullback{\textendash}Leibler (PWKL) item selection index using a simulation study in the context of the G-DINA model. The impact of item quality, generating models, and test termination rules on attribute classification accuracy or test length is also investigated. The results of the study show that the MPWKL and GDI perform very similarly, and have higher correct attribute classification rates or shorter mean test lengths compared with the PWKL. In addition, the GDI has the shortest implementation time among the three indices. The proportion of item usage with respect to the required attributes across the different conditions is also tracked and discussed.}, doi = {10.1177/0146621614554650}, url = {http://apm.sagepub.com/content/39/3/167.abstract}, author = {Kaplan, Mehmet and de la Torre, Jimmy and Barrada, Juan Ram{\'o}n} } @article {2348, title = {Online Item Calibration for Q-Matrix in CD-CAT}, journal = {Applied Psychological Measurement}, volume = {39}, number = {1}, year = {2015}, pages = {5-15}, abstract = {

Item replenishment is important for maintaining a large-scale item bank. In this article, the authors consider calibrating new items based on pre-calibrated operational items under the deterministic inputs, noisy-and-gate model, the specification of which includes the so-called -matrix, as well as the slipping and guessing parameters. Making use of the maximum likelihood and Bayesian estimators for the latent knowledge states, the authors propose two methods for the calibration. These methods are applicable to both traditional paper\–pencil\–based tests, for which the selection of operational items is prefixed, and computerized adaptive tests, for which the selection of operational items is sequential and random. Extensive simulations are done to assess and to compare the performance of these approaches. Extensions to other diagnostic classification models are also discussed.

}, doi = {10.1177/0146621613513065}, url = {http://apm.sagepub.com/content/39/1/5.abstract}, author = {Chen, Yunxiao and Liu, Jingchen and Ying, Zhiliang} } @article {2354, title = {On-the-Fly Assembled Multistage Adaptive Testing}, journal = {Applied Psychological Measurement}, volume = {39}, number = {2}, year = {2015}, pages = {104-118}, abstract = {

Recently, multistage testing (MST) has been adopted by several important large-scale testing programs and become popular among practitioners and researchers. Stemming from the decades of history of computerized adaptive testing (CAT), the rapidly growing MST alleviates several major problems of earlier CAT applications. Nevertheless, MST is only one among all possible solutions to these problems. This article presents a new adaptive testing design, \“on-the-fly assembled multistage adaptive testing\” (OMST), which combines the benefits of CAT and MST and offsets their limitations. Moreover, OMST also provides some unique advantages over both CAT and MST. A simulation study was conducted to compare OMST with MST and CAT, and the results demonstrated the promising features of OMST. Finally, the \“Discussion\” section provides suggestions on possible future adaptive testing designs based on the OMST framework, which could provide great flexibility for adaptive tests in the digital future and open an avenue for all types of hybrid designs based on the different needs of specific tests.

}, doi = {10.1177/0146621614544519}, url = {http://apm.sagepub.com/content/39/2/104.abstract}, author = {Zheng, Yi and Chang, Hua-Hua} } @article {2394, title = {Stochastic Curtailment in Adaptive Mastery Testing: Improving the Efficiency of Confidence Interval{\textendash}Based Stopping Rules}, journal = {Applied Psychological Measurement}, volume = {39}, number = {4}, year = {2015}, pages = {278-292}, abstract = {A well-known stopping rule in adaptive mastery testing is to terminate the assessment once the examinee{\textquoteright}s ability confidence interval lies entirely above or below the cut-off score. This article proposes new procedures that seek to improve such a variable-length stopping rule by coupling it with curtailment and stochastic curtailment. Under the new procedures, test termination can occur earlier if the probability is high enough that the current classification decision remains the same should the test continue. Computation of this probability utilizes normality of an asymptotically equivalent version of the maximum likelihood ability estimate. In two simulation sets, the new procedures showed a substantial reduction in average test length while maintaining similar classification accuracy to the original method.}, doi = {10.1177/0146621614561314}, url = {http://apm.sagepub.com/content/39/4/278.abstract}, author = {Sie, Haskell and Finkelman, Matthew D. and Bartroff, Jay and Thompson, Nathan A.} } @article {2483, title = {Using Out-of-Level Items in Computerized Adaptive Testing}, journal = {International Journal of Testing}, volume = {15}, number = {50-70}, year = {2015}, abstract = {Out-of-level testing refers to the practice of assessing a student with a test that is intended for students at a higher or lower grade level. Although the appropriateness of out-of-level testing for accountability purposes has been questioned by educators and policymakers, incorporating out-of-level items in formative assessments for accurate feedback is recommended. This study made use of a commercial item bank with vertically scaled items across grades and simulated student responses in a computerized adaptive testing (CAT) environment. Results of the study suggested that administration of out-of-level items improved measurement accuracy and test efficiency for students who perform significantly above or below their grade-level peers. This study has direct implications with regards to the relevance, applicability, and benefits of using out-of-level items in CAT.}, doi = {http://dx.doi.org/10.1080/15305058.2014.979492}, author = {Wei,H. and Lin,J.} } @article {2403, title = {Utilizing Response Times in Computerized Classification Testing}, journal = {Applied Psychological Measurement}, volume = {39}, number = {5}, year = {2015}, pages = {389-405}, abstract = {A well-known approach in computerized mastery testing is to combine the Sequential Probability Ratio Test (SPRT) stopping rule with item selection to maximize Fisher information at the mastery threshold. This article proposes a new approach in which a time limit is defined for the test and examinees{\textquoteright} response times are considered in both item selection and test termination. Item selection is performed by maximizing Fisher information per time unit, rather than Fisher information itself. The test is terminated once the SPRT makes a classification decision, the time limit is exceeded, or there is no remaining item that has a high enough probability of being answered before the time limit. In a simulation study, the new procedure showed a substantial reduction in average testing time while slightly improving classification accuracy compared with the original method. In addition, the new procedure reduced the percentage of examinees who exceeded the time limit.}, doi = {10.1177/0146621615569504}, url = {http://apm.sagepub.com/content/39/5/389.abstract}, author = {Sie, Haskell and Finkelman, Matthew D. and Riley, Barth and Smits, Niels} } @article {2405, title = {Variable-Length Computerized Adaptive Testing Using the Higher Order DINA Model}, journal = {Journal of Educational Measurement}, volume = {52}, number = {2}, year = {2015}, pages = {125{\textendash}143}, abstract = {Cognitive diagnosis models provide profile information about a set of latent binary attributes, whereas item response models yield a summary report on a latent continuous trait. To utilize the advantages of both models, higher order cognitive diagnosis models were developed in which information about both latent binary attributes and latent continuous traits is available. To facilitate the utility of cognitive diagnosis models, corresponding computerized adaptive testing (CAT) algorithms were developed. Most of them adopt the fixed-length rule to terminate CAT and are limited to ordinary cognitive diagnosis models. In this study, the higher order deterministic-input, noisy-and-gate (DINA) model was used as an example, and three criteria based on the minimum-precision termination rule were implemented: one for the latent class, one for the latent trait, and the other for both. The simulation results demonstrated that all of the termination criteria were successful when items were selected according to the Kullback-Leibler information and the posterior-weighted Kullback-Leibler information, and the minimum-precision rule outperformed the fixed-length rule with a similar test length in recovering the latent attributes and the latent trait.}, issn = {1745-3984}, doi = {10.1111/jedm.12069}, url = {http://dx.doi.org/10.1111/jedm.12069}, author = {Hsu, Chia-Ling and Wang, Wen-Chung} } @article {2351, title = {Cognitive Diagnostic Models and Computerized Adaptive Testing: Two New Item-Selection Methods That Incorporate Response Times}, journal = {Journal of Computerized Adaptive Testing}, volume = {2}, year = {2014}, pages = {59-76}, doi = {10.7333/1412-0204059}, url = {http://www.iacat.org/jcat/index.php/jcat/article/view/43/21}, author = {Finkelman, M. D. and Kim, W. and Weissman, A. and Cook, R.J.} } @article {2336, title = {A Comparison of Four Item-Selection Methods for Severely Constrained CATs}, journal = {Educational and Psychological Measurement}, volume = {74}, number = {4}, year = {2014}, pages = {677-696}, abstract = {

This study compared four item-selection procedures developed for use with severely constrained computerized adaptive tests (CATs). Severely constrained CATs refer to those adaptive tests that seek to meet a complex set of constraints that are often not conclusive to each other (i.e., an item may contribute to the satisfaction of several constraints at the same time). The procedures examined in the study included the weighted deviation model (WDM), the weighted penalty model (WPM), the maximum priority index (MPI), and the shadow test approach (STA). In addition, two modified versions of the MPI procedure were introduced to deal with an edge case condition that results in the item selection procedure becoming dysfunctional during a test. The results suggest that the STA worked best among all candidate methods in terms of measurement accuracy and constraint management. For the other three heuristic approaches, they did not differ significantly in measurement accuracy and constraint management at the lower bound level. However, the WPM method appears to perform considerably better in overall constraint management than either the WDM or MPI method. Limitations and future research directions were also discussed.

}, doi = {10.1177/0013164413517503}, url = {http://epm.sagepub.com/content/74/4/677.abstract}, author = {He, Wei and Diao, Qi and Hauser, Carl} } @article {2322, title = {A Comparison of Multi-Stage and Linear Test Designs for Medium-Size Licensure and Certification Examinations}, journal = {Journal of Computerized Adaptive Testing}, volume = {2}, year = {2014}, month = {02-2014}, pages = {18-36}, doi = {10.7333/1402-0202018}, author = {Brossman, Bradley. G. and Guille, R.A.} } @article {2339, title = {Computerized Adaptive Testing for the Random Weights Linear Logistic Test Model}, journal = {Applied Psychological Measurement}, volume = {38}, number = {6}, year = {2014}, pages = {415-431}, abstract = {

This article discusses four-item selection rules to design efficient individualized tests for the random weights linear logistic test model (RWLLTM): minimum posterior-weighted -error minimum expected posterior-weighted -error maximum expected Kullback\–Leibler divergence between subsequent posteriors (KLP), and maximum mutual information (MUI). The RWLLTM decomposes test items into a set of subtasks or cognitive features and assumes individual-specific effects of the features on the difficulty of the items. The model extends and improves the well-known linear logistic test model in which feature effects are only estimated at the aggregate level. Simulations show that the efficiencies of the designs obtained with the different criteria appear to be equivalent. However, KLP and MUI are given preference over and due to their lesser complexity, which significantly reduces the computational burden.

}, doi = {10.1177/0146621614533987}, url = {http://apm.sagepub.com/content/38/6/415.abstract}, author = {Crabbe, Marjolein and Vandebroek, Martina} } @book {2480, title = {Computerized multistage testing: Theory and applications}, year = {2014}, publisher = {CRC Press}, organization = {CRC Press}, address = {Boca Raton FL}, isbn = {13-978-1-4665-0577-3}, author = {Duanli Yan and Alina A von Davier and Charles Lewis} } @article {2345, title = {Detecting Item Preknowledge in Computerized Adaptive Testing Using Information Theory and Combinatorial Optimization}, journal = {Journal of Computerized Adaptive Testing}, volume = {2}, year = {2014}, pages = {37-58}, keywords = {combinatorial optimization, hypothesis testing, item preknowledge, Kullback-Leibler divergence, simulated annealing., test security}, issn = {2165-6592}, doi = {10.7333/1410-0203037}, url = {http://www.iacat.org/jcat/index.php/jcat/article/view/36/18}, author = {Belov, D. I.} } @article {2350, title = {Determining the Overall Impact of Interruptions During Online Testing}, journal = {Journal of Educational Measurement}, volume = {51}, number = {4}, year = {2014}, pages = {419{\textendash}440}, abstract = {

With an increase in the number of online tests, interruptions during testing due to unexpected technical issues seem unavoidable. For example, interruptions occurred during several recent state tests. When interruptions occur, it is important to determine the extent of their impact on the examinees\’ scores. There is a lack of research on this topic due to the novelty of the problem. This article is an attempt to fill that void. Several methods, primarily based on propensity score matching, linear regression, and item response theory, were suggested to determine the overall impact of the interruptions on the examinees\’ scores. A realistic simulation study shows that the suggested methods have satisfactory Type I error rate and power. Then the methods were applied to data from the Indiana Statewide Testing for Educational Progress-Plus (ISTEP+) test that experienced interruptions in 2013. The results indicate that the interruptions did not have a significant overall impact on the student scores for the ISTEP+ test.

}, issn = {1745-3984}, doi = {10.1111/jedm.12052}, url = {http://dx.doi.org/10.1111/jedm.12052}, author = {Sinharay, Sandip and Wan, Ping and Whitaker, Mike and Kim, Dong-In and Zhang, Litong and Choi, Seung W.} } @article {2349, title = {An Enhanced Approach to Combine Item Response Theory With Cognitive Diagnosis in Adaptive Testing}, journal = {Journal of Educational Measurement}, volume = {51}, number = {4}, year = {2014}, pages = {358{\textendash}380}, abstract = {

Computerized adaptive testing offers the possibility of gaining information on both the overall ability and cognitive profile in a single assessment administration. Some algorithms aiming for these dual purposes have been proposed, including the shadow test approach, the dual information method (DIM), and the constraint weighted method. The current study proposed two new methods, aggregate ranked information index (ARI) and aggregate standardized information index (ASI), which appropriately addressed the noncompatibility issue inherent in the original DIM method. More flexible weighting schemes that put different emphasis on information about general ability (i.e., \θ in item response theory) and information about cognitive profile (i.e., \α in cognitive diagnostic modeling) were also explored. Two simulation studies were carried out to investigate the effectiveness of the new methods and weighting schemes. Results showed that the new methods with the flexible weighting schemes could produce more accurate estimation of both overall ability and cognitive profile than the original DIM. Among them, the ASI with both empirical and theoretical weights is recommended, and attribute-level weighting scheme is preferred if some attributes are considered more important from a substantive perspective.

}, issn = {1745-3984}, doi = {10.1111/jedm.12057}, url = {http://dx.doi.org/10.1111/jedm.12057}, author = {Wang, Chun and Zheng, Chanjin and Chang, Hua-Hua} } @article {2332, title = {Enhancing Pool Utilization in Constructing the Multistage Test Using Mixed-Format Tests}, journal = {Applied Psychological Measurement}, volume = {38}, number = {4}, year = {2014}, pages = {268-280}, abstract = {

This study investigated a new pool utilization method of constructing multistage tests (MST) using the mixed-format test based on the generalized partial credit model (GPCM). MST simulations of a classification test were performed to evaluate the MST design. A linear programming (LP) model was applied to perform MST reassemblies based on the initial MST construction. Three subsequent MST reassemblies were performed. For each reassembly, three test unit replacement ratios (TRRs; 0.22, 0.44, and 0.66) were investigated. The conditions of the three passing rates (30\%, 50\%, and 70\%) were also considered in the classification testing. The results demonstrated that various MST reassembly conditions increased the overall pool utilization rates, while maintaining the desired MST construction. All MST testing conditions performed equally well in terms of the precision of the classification decision.

}, doi = {10.1177/0146621613515545}, url = {http://apm.sagepub.com/content/38/4/268.abstract}, author = {Park, Ryoungsun and Kim, Jiseon and Chung, Hyewon and Dodd, Barbara G.} } @article {2325, title = {General Test Overlap Control: Improved Algorithm for CAT and CCT}, journal = {Applied Psychological Measurement}, volume = {38}, number = {3}, year = {2014}, pages = {229-244}, abstract = {

This article proposed a new online test overlap control algorithm that is an improvement of Chen\’s algorithm in controlling general test overlap rate for item pooling among a group of examinees. Chen\’s algorithm is not very efficient in that not only item pooling between current examinee and prior examinees is controlled for but also item pooling between previous examinees, which would have been controlled for when they were current examinees. The proposed improvement increases efficiency by only considering item pooling between current and previous examinees, and its improved performance over Chen is demonstrated in a simulated computerized adaptive testing (CAT) environment. Moreover, the proposed algorithm is adapted for computerized classification testing (CCT) using the sequential probability ratio test procedure and is evaluated against some existing exposure control procedures. The proposed algorithm appears to work best in controlling general test overlap rate among the exposure control procedures examined without sacrificing much classification precision, though longer tests might be required for more stringent control of item pooling among larger groups. Given the capability of the proposed algorithm in controlling item pooling among a group of examinees of any size and its ease of implementation, it appears to be a good test overlap control method.

}, doi = {10.1177/0146621613513494}, url = {http://apm.sagepub.com/content/38/3/229.abstract}, author = {Chen, Shu-Ying and Lei, Pui-Wa and Chen, Jyun-Hong and Liu, Tzu-Chen} } @article {2353, title = {Improving Measurement Precision of Hierarchical Latent Traits Using Adaptive Testing}, journal = {Journal of Educational and Behavioral Statistics}, volume = {39}, number = {6}, year = {2014}, pages = {452-477}, abstract = {

Many latent traits in social sciences display a hierarchical structure, such as intelligence, cognitive ability, or personality. Usually a second-order factor is linearly related to a group of first-order factors (also called domain abilities in cognitive ability measures), and the first-order factors directly govern the actual item responses. Because only a subtest of items is used to measure each domain, the lack of sufficient reliability becomes the primary impediment for generating and reporting domain abilities. In recent years, several item response theory (IRT) models have been proposed to account for hierarchical factor structures, and these models are also shown to alleviate the low reliability issue by using in-test collateral information to improve measurement precision. This article advocates using adaptive item selection together with a higher order IRT model to further increase the reliability of hierarchical latent trait estimation. Two item selection algorithms are proposed\—the constrained D-optimal method and the sequencing domain method. Both are shown to yield improved measurement precision as compared to the unidimensional item selection (by treating each dimension separately). The improvement is more prominent when the test length is short and when the correlation between dimensions is high (e.g., higher than .64). Moreover, two reliability indices for hierarchical latent traits are discussed and their use for quantifying the reliability of hierarchical traits measured by adaptive testing is demonstrated.

}, doi = {10.3102/1076998614559419}, url = {http://jeb.sagepub.com/cgi/content/abstract/39/6/452}, author = {Wang, Chun} } @article {2334, title = {Item Pool Design for an Operational Variable-Length Computerized Adaptive Test}, journal = {Educational and Psychological Measurement}, volume = {74}, number = {3}, year = {2014}, pages = {473-494}, abstract = {

For computerized adaptive tests (CATs) to work well, they must have an item pool with sufficient numbers of good quality items. Many researchers have pointed out that, in developing item pools for CATs, not only is the item pool size important but also the distribution of item parameters and practical considerations such as content distribution and item exposure issues. Yet, there is little research on how to design item pools to have those desirable features. The research reported in this article provided step-by-step hands-on guidance on the item pool design process by applying the bin-and-union method to design item pools for a large-scale licensure CAT employing complex adaptive testing algorithm with variable test length, a decision based on stopping rule, content balancing, and exposure control. The design process involved extensive simulations to identify several alternative item pool designs and evaluate their performance against a series of criteria. The design output included the desired item pool size and item parameter distribution. The results indicate that the mechanism used to identify the desirable item pool features functions well and that two recommended item pool designs would support satisfactory performance of the operational testing program.

}, doi = {10.1177/0013164413509629}, url = {http://epm.sagepub.com/content/74/3/473.abstract}, author = {He, Wei and Reckase, Mark D.} } @article {2326, title = {Item Selection Methods Based on Multiple Objective Approaches for Classifying Respondents Into Multiple Levels}, journal = {Applied Psychological Measurement}, volume = {38}, number = {3}, year = {2014}, pages = {187-200}, abstract = {

Computerized classification tests classify examinees into two or more levels while maximizing accuracy and minimizing test length. The majority of currently available item selection methods maximize information at one point on the ability scale, but in a test with multiple cutting points selection methods could take all these points simultaneously into account. If for each cutting point one objective is specified, the objectives can be combined into one optimization function using multiple objective approaches. Simulation studies were used to compare the efficiency and accuracy of eight selection methods in a test based on the sequential probability ratio test. Small differences were found in accuracy and efficiency between different methods depending on the item pool and settings of the classification method. The size of the indifference region had little influence on accuracy but considerable influence on efficiency. Content and exposure control had little influence on accuracy and efficiency.

}, doi = {10.1177/0146621613509723}, url = {http://apm.sagepub.com/content/38/3/187.abstract}, author = {van Groen, Maaike M. and Eggen, Theo J. H. M. and Veldkamp, Bernard P.} } @article {2324, title = {Multidimensional CAT Item Selection Methods for Domain Scores and Composite Scores With Item Exposure Control and Content Constraints}, journal = {Journal of Educational Measurement}, volume = {51}, number = {1}, year = {2014}, pages = {18{\textendash}38}, abstract = {

The intent of this research was to find an item selection procedure in the multidimensional computer adaptive testing (CAT) framework that yielded higher precision for both the domain and composite abilities, had a higher usage of the item pool, and controlled the exposure rate. Five multidimensional CAT item selection procedures (minimum angle; volume; minimum error variance of the linear combination; minimum error variance of the composite score with optimized weight; and Kullback-Leibler information) were studied and compared with two methods for item exposure control (the Sympson-Hetter procedure and the fixed-rate procedure, the latter simply refers to putting a limit on the item exposure rate) using simulated data. The maximum priority index method was used for the content constraints. Results showed that the Sympson-Hetter procedure yielded better precision than the fixed-rate procedure but had much lower item pool usage and took more time. The five item selection procedures performed similarly under Sympson-Hetter. For the fixed-rate procedure, there was a trade-off between the precision of the ability estimates and the item pool usage: the five procedures had different patterns. It was found that (1) Kullback-Leibler had better precision but lower item pool usage; (2) minimum angle and volume had balanced precision and item pool usage; and (3) the two methods minimizing the error variance had the best item pool usage and comparable overall score recovery but less precision for certain domains. The priority index for content constraints and item exposure was implemented successfully.

}, issn = {1745-3984}, doi = {10.1111/jedm.12032}, url = {http://dx.doi.org/10.1111/jedm.12032}, author = {Yao, Lihua} } @article {2333, title = {A Numerical Investigation of the Recovery of Point Patterns With Minimal Information}, journal = {Applied Psychological Measurement}, volume = {38}, number = {4}, year = {2014}, pages = {329-335}, abstract = {

A method has been proposed (Tsogo et al. 2001) in order to reconstruct the geometrical configuration of a large point set using minimal information. This paper employs numerical examples to investigate the proposed procedure. The suggested method has two great advantages. It reduces the volume of the data collection exercise and eases the computational effort involved in analyzing the data. It is suggested, however, that the method while possibly providing a useful starting point for a solution, does not provide a universal panacea.

}, doi = {10.1177/0146621613516186}, url = {http://apm.sagepub.com/content/38/4/329.abstract}, author = {Cox, M. A. A.} } @article {2352, title = {The Sequential Probability Ratio Test and Binary Item Response Models}, journal = {Journal of Educational and Behavioral Statistics}, volume = {39}, number = {3}, year = {2014}, pages = {203-230}, abstract = {

The sequential probability ratio test (SPRT) is a common method for terminating item response theory (IRT)-based adaptive classification tests. To decide whether a classification test should stop, the SPRT compares a simple log-likelihood ratio, based on the classification bound separating two categories, to prespecified critical values. As has been previously noted (Spray \& Reckase, 1994), the SPRT test statistic is not necessarily monotonic with respect to the classification bound when item response functions have nonzero lower asymptotes. Because of nonmonotonicity, several researchers (including Spray \& Reckase, 1994) have recommended selecting items at the classification bound rather than the current ability estimate when terminating SPRT-based classification tests. Unfortunately, this well-worn advice is a bit too simplistic. Items yielding optimal evidence for classification depend on the IRT model, item parameters, and location of an examinee with respect to the classification bound. The current study illustrates, in depth, the relationship between the SPRT test statistic and classification evidence in binary IRT models. Unlike earlier studies, we examine the form of the SPRT-based log-likelihood ratio while altering the classification bound and item difficulty. These investigations motivate a novel item selection algorithm based on optimizing the expected SPRT criterion given the current ability estimate. The new expected log-likelihood ratio algorithm results in test lengths noticeably shorter than current, commonly used algorithms, and with no loss in classification accuracy.

}, doi = {10.3102/1076998614524824}, url = {http://jeb.sagepub.com/cgi/content/abstract/39/3/203}, author = {Nydick, Steven W.} } @article {2321, title = {A Sequential Procedure for Detecting Compromised Items in the Item Pool of a CAT System}, journal = {Applied Psychological Measurement}, volume = {38}, number = {2}, year = {2014}, pages = {87-104}, abstract = {

To maintain the validity of a continuous testing system, such as computerized adaptive testing (CAT), items should be monitored to ensure that the performance of test items has not gone through any significant changes during their lifetime in an item pool. In this article, the author developed a sequentially monitoring procedure based on a series of statistical hypothesis tests to examine whether the statistical characteristics of individual items have changed significantly during test administration. Simulation studies show that under the simulated setting, by choosing an appropriate cutoff point, the procedure can control the rate of Type I errors at any reasonable significance level and meanwhile, has a very low rate of Type II errors.

}, doi = {10.1177/0146621613510062}, url = {http://apm.sagepub.com/content/38/2/87.abstract}, author = {Zhang, Jinming} } @article {2341, title = {Stratified Item Selection and Exposure Control in Unidimensional Adaptive Testing in the Presence of Two-Dimensional Data}, journal = {Applied Psychological Measurement}, volume = {38}, number = {7}, year = {2014}, pages = {563-576}, abstract = {

It is not uncommon to use unidimensional item response theory models to estimate ability in multidimensional data with computerized adaptive testing (CAT). The current Monte Carlo study investigated the penalty of this model misspecification in CAT implementations using different item selection methods and exposure control strategies. Three item selection methods\—maximum information (MAXI), a-stratification (STRA), and a-stratification with b-blocking (STRB) with and without Sympson\–Hetter (SH) exposure control strategy\—were investigated. Calibrating multidimensional items as unidimensional items resulted in inaccurate item parameter estimates. Therefore, MAXI performed better than STRA and STRB in estimating the ability parameters. However, all three methods had relatively large standard errors. SH exposure control had no impact on the number of overexposed items. Existing unidimensional CAT implementations might consider using MAXI only if recalibration as multidimensional model is too expensive. Otherwise, building a CAT pool by calibrating multidimensional data as unidimensional is not recommended.

}, doi = {10.1177/0146621614536768}, url = {http://apm.sagepub.com/content/38/7/563.abstract}, author = {Kalinowski, Kevin E. and Natesan, Prathiba and Henson, Robin K.} } @article {2344, title = {Using Multidimensional CAT to Administer a Short, Yet Precise, Screening Test}, journal = {Applied Psychological Measurement}, volume = {38}, number = {8}, year = {2014}, pages = {614-631}, abstract = {

Multidimensional computerized adaptive testing (MCAT) provides a mechanism by which the simultaneous goals of accurate prediction and minimal testing time for a screening test could both be met. This article demonstrates the use of MCAT to administer a screening test for the Computerized Adaptive Testing\–Armed Services Vocational Aptitude Battery (CAT-ASVAB) under a variety of manipulated conditions. CAT-ASVAB is a test battery administered via unidimensional CAT (UCAT) that is used to qualify applicants for entry into the U.S. military and assign them to jobs. The primary research question being evaluated is whether the use of MCAT to administer a screening test can lead to significant reductions in testing time from the full-length selection test, without significant losses in score precision. Different stopping rules, item selection methods, content constraints, time constraints, and population distributions for the MCAT administration are evaluated through simulation, and compared with results from a regular full-length UCAT administration.

}, doi = {10.1177/0146621614541514}, url = {http://apm.sagepub.com/content/38/8/614.abstract}, author = {Yao, Lihua and Pommerich, Mary and Segall, Daniel O.} } @article {2317, title = {The Utility of Adaptive Testing in Addressing the Problem of Unmotivated Examinees}, journal = {Journal of Computerized Adaptive Testing}, volume = {2}, year = {2014}, pages = {1-17}, doi = {10.7333/1401-02010001}, author = {Steven L. Wise} } @article {2294, title = {The Applicability of Multidimensional Computerized Adaptive Testing for Cognitive Ability Measurement in Organizational Assessment}, journal = {International Journal of Testing}, volume = {13}, number = {2}, year = {2013}, pages = {123-139}, doi = {10.1080/15305058.2012.672352}, url = {http://www.tandfonline.com/doi/abs/10.1080/15305058.2012.672352}, author = {Makransky, Guido and Glas, Cees A. W.} } @article {2286, title = {The applicability of multidimensional computerized adaptive testing to cognitive ability measurement in organizational assessment}, journal = {International Journal of Testing}, volume = {13}, year = {2013}, pages = {123-139}, issn = {1530-5058}, doi = {10.1080/15305058.2012.672352}, author = {Makransky, G. and Glas, C. A. W.} } @article {2303, title = {The Application of the Monte Carlo Approach to Cognitive Diagnostic Computerized Adaptive Testing With Content Constraints}, journal = {Applied Psychological Measurement}, volume = {37}, number = {6}, year = {2013}, pages = {482-496}, abstract = {

The Monte Carlo approach which has previously been implemented in traditional computerized adaptive testing (CAT) is applied here to cognitive diagnostic CAT to test the ability of this approach to address multiple content constraints. The performance of the Monte Carlo approach is compared with the performance of the modified maximum global discrimination index (MMGDI) method on simulations in which the only content constraint is on the number of items that measure each attribute. The results of the two simulation experiments show that (a) the Monte Carlo method fulfills all the test requirements and produces satisfactory measurement precision and item exposure results and (b) the Monte Carlo method outperforms the MMGDI method when the Monte Carlo method applies either the posterior-weighted Kullback\–Leibler algorithm or the hybrid Kullback\–Leibler information as the item selection index. Overall, the recovery rate of the knowledge states, the distribution of the item exposure, and the utilization rate of the item bank are improved when the Monte Carlo method is used.

}, doi = {10.1177/0146621613486015}, url = {http://apm.sagepub.com/content/37/6/482.abstract}, author = {Mao, Xiuzhen and Xin, Tao} } @article {2265, title = {Comparing the Performance of Five Multidimensional CAT Selection Procedures With Different Stopping Rules}, journal = {Applied Psychological Measurement}, volume = {37}, number = {1}, year = {2013}, pages = {3-23}, abstract = {

Through simulated data, five multidimensional computerized adaptive testing (MCAT) selection procedures with varying test lengths are examined and compared using different stopping rules. Fixed item exposure rates are used for all the items, and the Priority Index (PI) method is used for the content constraints. Two stopping rules, standard error (SE) and predicted standard error reduction (PSER), are proposed; each MCAT selection process is stopped if either the required precision has been achieved or the selected number of items has reached the maximum limit. The five procedures are as follows: minimum angle (Ag), volume (Vm), minimize the error variance of the linear combination (V 1), minimize the error variance of the composite score with the optimized weight (V 2), and Kullback\–Leibler (KL) information. The recovery for the domain scores or content scores and their overall score, test length, and test reliability are compared across the five MCAT procedures and between the two stopping rules. It is found that the two stopping rules are implemented successfully and that KL uses the least number of items to reach the same precision level, followed by Vm; Ag uses the largest number of items. On average, to reach a precision of SE = .35, 40, 55, 63, 63, and 82 items are needed for KL, Vm, V 1, V 2, and Ag, respectively, for the SE stopping rule. PSER yields 38, 45, 53, 58, and 68 items for KL, Vm, V 1, V 2, and Ag, respectively; PSER yields only slightly worse results than SE, but with much fewer items. Overall, KL is recommended for varying-length MCAT.

}, doi = {10.1177/0146621612455687}, url = {http://apm.sagepub.com/content/37/1/3.abstract}, author = {Yao, Lihua} } @article {2287, title = {A Comparison of Computerized Classification Testing and Computerized Adaptive Testing in Clinical Psychology}, journal = {Journal of Computerized Adaptive Testing}, volume = {1}, year = {2013}, pages = {19-37}, doi = {10.7333/1302-0102019}, author = {Smits, N. and Finkelman, M. D.} } @article {2293, title = {A Comparison of Exposure Control Procedures in CAT Systems Based on Different Measurement Models for Testlets}, journal = {Applied Measurement in Education}, volume = {26}, number = {2}, year = {2013}, pages = {113-135}, doi = {10.1080/08957347.2013.765434}, url = {http://www.tandfonline.com/doi/abs/10.1080/08957347.2013.765434}, author = {Boyd, Aimee M. and Dodd, Barbara and Fitzpatrick, Steven} } @article {2304, title = {A Comparison of Exposure Control Procedures in CATs Using the 3PL Model}, journal = {Educational and Psychological Measurement}, volume = {73}, number = {5}, year = {2013}, pages = {857-874}, abstract = {

This study compares the progressive-restricted standard error (PR-SE) exposure control procedure to three commonly used procedures in computerized adaptive testing, the randomesque, Sympson\–Hetter (SH), and no exposure control methods. The performance of these four procedures is evaluated using the three-parameter logistic model under the manipulated conditions of item pool size (small vs. large) and stopping rules (fixed-length vs. variable-length). PR-SE provides the advantage of similar constraints to SH, without the need for a preceding simulation study to execute it. Overall for the large and small item banks, the PR-SE method administered almost all of the items from the item pool, whereas the other procedures administered about 52\% or less of the large item bank and 80\% or less of the small item bank. The PR-SE yielded the smallest amount of item overlap between tests across conditions and administered fewer items on average than SH. PR-SE obtained these results with similar, and acceptable, measurement precision compared to the other exposure control procedures while vastly improving on item pool usage.

}, doi = {10.1177/0013164413486802}, url = {http://epm.sagepub.com/content/73/5/857.abstract}, author = {Leroux, Audrey J. and Lopez, Myriam and Hembry, Ian and Dodd, Barbara G.} } @article {2315, title = {A Comparison of Four Methods for Obtaining Information Functions for Scores From Computerized Adaptive Tests With Normally Distributed Item Difficulties and Discriminations}, journal = {Journal of Computerized Adaptive Testing}, volume = {1}, year = {2013}, pages = {88-107}, doi = {10.7333/1312-0105088}, author = {Ito, K. and Segall, D.O.} } @article {2281, title = {Deriving Stopping Rules for Multidimensional Computerized Adaptive Testing}, journal = {Applied Psychological Measurement}, volume = {37}, number = {2}, year = {2013}, pages = {99-122}, abstract = {

Multidimensional computerized adaptive testing (MCAT) is able to provide a vector of ability estimates for each examinee, which could be used to provide a more informative profile of an examinee\’s performance. The current literature on MCAT focuses on the fixed-length tests, which can generate less accurate results for those examinees whose abilities are quite different from the average difficulty level of the item bank when there are only a limited number of items in the item bank. Therefore, instead of stopping the test with a predetermined fixed test length, the authors use a more informative stopping criterion that is directly related to measurement accuracy. Specifically, this research derives four stopping rules that either quantify the measurement precision of the ability vector (i.e., minimum determinant rule [D-rule], minimum eigenvalue rule [E-rule], and maximum trace rule [T-rule]) or quantify the amount of available information carried by each item (i.e., maximum Kullback\–Leibler divergence rule [K-rule]). The simulation results showed that all four stopping rules successfully terminated the test when the mean squared error of ability estimation is within a desired range, regardless of examinees\’ true abilities. It was found that when using the D-, E-, or T-rule, examinees with extreme abilities tended to have tests that were twice as long as the tests received by examinees with moderate abilities. However, the test length difference with K-rule is not very dramatic, indicating that K-rule may not be very sensitive to measurement precision. In all cases, the cutoff value for each stopping rule needs to be adjusted on a case-by-case basis to find an optimal solution.

}, doi = {10.1177/0146621612463422}, url = {http://apm.sagepub.com/content/37/2/99.abstract}, author = {Wang, Chun and Chang, Hua-Hua and Boughton, Keith A.} } @article {2316, title = {Estimating Measurement Precision in Reduced-Length Multi-Stage Adaptive Testing }, journal = {Journal of Computerized Adaptive Testing}, volume = {1}, year = {2013}, pages = {67-87}, doi = {10.7333/1309-0104067}, author = {Crotts, K.M. and Zenisky, A. L. and Sireci, S.G. and Li, X.} } @article {2266, title = {The Influence of Item Calibration Error on Variable-Length Computerized Adaptive Testing}, journal = {Applied Psychological Measurement}, volume = {37}, number = {1}, year = {2013}, pages = {24-40}, abstract = {

Variable-length computerized adaptive testing (VL-CAT) allows both items and test length to be \“tailored\” to examinees, thereby achieving the measurement goal (e.g., scoring precision or classification) with as few items as possible. Several popular test termination rules depend on the standard error of the ability estimate, which in turn depends on the item parameter values. However, items are chosen on the basis of their parameter estimates, and capitalization on chance may occur. In this article, the authors investigated the effects of capitalization on chance on test length and classification accuracy in several VL-CAT simulations. The results confirm that capitalization on chance occurs in VL-CAT and has complex effects on test length, ability estimation, and classification accuracy. These results have important implications for the design and implementation of VL-CATs.

}, doi = {10.1177/0146621612461727}, url = {http://apm.sagepub.com/content/37/1/24.abstract}, author = {Patton, Jeffrey M. and Ying Cheng, and Yuan, Ke-Hai and Diao, Qi} } @article {2296, title = {Integrating Test-Form Formatting Into Automated Test Assembly}, journal = {Applied Psychological Measurement}, volume = {37}, number = {5}, year = {2013}, pages = {361-374}, abstract = {

Automated test assembly uses the methodology of mixed integer programming to select an optimal set of items from an item bank. Automated test-form generation uses the same methodology to optimally order the items and format the test form. From an optimization point of view, production of fully formatted test forms directly from the item pool using a simultaneous optimization model is more attractive than any of the current, more time-consuming two-stage processes. The goal of this study was to provide such simultaneous models both for computer-delivered and paper forms, as well as explore their performances relative to two-stage optimization. Empirical examples are presented to show that it is possible to automatically produce fully formatted optimal test forms directly from item pools up to some 2,000 items on a regular PC in realistic times.

}, doi = {10.1177/0146621613476157}, url = {http://apm.sagepub.com/content/37/5/361.abstract}, author = {Diao, Qi and van der Linden, Wim J.} } @article {2288, title = {Item Ordering in Stochastically Curtailed Health Questionnaires With an Observable Outcome}, journal = {Journal of Computerized Adaptive Testing}, volume = {1}, year = {2013}, pages = {38-66}, doi = {10.7333/1304-0103038}, author = {Finkelman, M. D. and Kim, W. and He, Y. and Lai, A.M.} } @article {2297, title = {Item Pocket Method to Allow Response Review and Change in Computerized Adaptive Testing}, journal = {Applied Psychological Measurement}, volume = {37}, number = {4}, year = {2013}, pages = {259-275}, abstract = {

Most computerized adaptive testing (CAT) programs do not allow test takers to review and change their responses because it could seriously deteriorate the efficiency of measurement and make tests vulnerable to manipulative test-taking strategies. Several modified testing methods have been developed that provide restricted review options while limiting the trade-off in CAT efficiency. The extent to which these methods provided test takers with options to review test items, however, still was quite limited. This study proposes the item pocket (IP) method, a new testing approach that allows test takers greater flexibility in changing their responses by eliminating restrictions that prevent them from moving across test sections to review their answers. A series of simulations were conducted to evaluate the robustness of the IP method against various manipulative test-taking strategies. Findings and implications of the study suggest that the IP method may be an effective solution for many CAT programs when the IP size and test time limit are properly set.

}, doi = {10.1177/0146621612473638}, url = {http://apm.sagepub.com/content/37/4/259.abstract}, author = {Han, Kyung T.} } @article {2320, title = {Longitudinal Multistage Testing}, journal = {Journal of Educational Measurement}, volume = {50}, number = {4}, year = {2013}, pages = {447{\textendash}468}, abstract = {

This article introduces longitudinal multistage testing (lMST), a special form of multistage testing (MST), as a method for adaptive testing in longitudinal large-scale studies. In lMST designs, test forms of different difficulty levels are used, whereas the values on a pretest determine the routing to these test forms. Since lMST allows for testing in paper and pencil mode, lMST may represent an alternative to conventional testing (CT) in assessments for which other adaptive testing designs are not applicable. In this article the performance of lMST is compared to CT in terms of test targeting as well as bias and efficiency of ability and change estimates. Using a simulation study, the effect of the stability of ability across waves, the difficulty level of the different test forms, and the number of link items between the test forms were investigated.

}, issn = {1745-3984}, doi = {10.1111/jedm.12028}, url = {http://dx.doi.org/10.1111/jedm.12028}, author = {Pohl, Steffi} } @article {2311, title = {Mutual Information Item Selection Method in Cognitive Diagnostic Computerized Adaptive Testing With Short Test Length}, journal = {Educational and Psychological Measurement}, volume = {73}, number = {6}, year = {2013}, pages = {1017-1035}, abstract = {

Cognitive diagnostic computerized adaptive testing (CD-CAT) purports to combine the strengths of both CAT and cognitive diagnosis. Cognitive diagnosis models aim at classifying examinees into the correct mastery profile group so as to pinpoint the strengths and weakness of each examinee whereas CAT algorithms choose items to determine those strengths and weakness as efficiently as possible. Most of the existing CD-CAT item selection algorithms are evaluated when test length is relatively long whereas several applications of CD-CAT, such as in interim assessment, require an item selection algorithm that is able to accurately recover examinees\’ mastery profile with short test length. In this article, we introduce the mutual information item selection method in the context of CD-CAT and then provide a computationally easier formula to make the method more amenable in real time. Mutual information is then evaluated against common item selection methods, such as Kullback\–Leibler information, posterior weighted Kullback\–Leibler information, and Shannon entropy. Based on our simulations, mutual information consistently results in nearly the highest attribute and pattern recovery rate in more than half of the conditions. We conclude by discussing how the number of attributes, Q-matrix structure, correlations among the attributes, and item quality affect estimation accuracy.

}, doi = {10.1177/0013164413498256}, url = {http://epm.sagepub.com/content/73/6/1017.abstract}, author = {Wang, Chun} } @article {2292, title = {The Philosophical Aspects of IRT Equating: Modeling Drift to Evaluate Cohort Growth in Large-Scale Assessments}, journal = {Educational Measurement: Issues and Practice}, volume = {32}, number = {1}, year = {2013}, pages = {2{\textendash}14}, keywords = {cohort growth, construct-relevant drift, evaluation of scale drift, philosophical aspects of IRT equating}, issn = {1745-3992}, doi = {10.1111/emip.12000}, url = {http://dx.doi.org/10.1111/emip.12000}, author = {Taherbhai, Husein and Seo, Daeryong} } @article {2283, title = {The Random-Threshold Generalized Unfolding Model and Its Application of Computerized Adaptive Testing}, journal = {Applied Psychological Measurement}, volume = {37}, number = {3}, year = {2013}, pages = {179-200}, abstract = {

The random-threshold generalized unfolding model (RTGUM) was developed by treating the thresholds in the generalized unfolding model as random effects rather than fixed effects to account for the subjective nature of the selection of categories in Likert items. The parameters of the new model can be estimated with the JAGS (Just Another Gibbs Sampler) freeware, which adopts a Bayesian approach for estimation. A series of simulations was conducted to evaluate the parameter recovery of the new model and the consequences of ignoring the randomness in thresholds. The results showed that the parameters of RTGUM were recovered fairly well and that ignoring the randomness in thresholds led to biased estimates. Computerized adaptive testing was also implemented on RTGUM, where the Fisher information criterion was used for item selection and the maximum a posteriori method was used for ability estimation. The simulation study showed that the longer the test length, the smaller the randomness in thresholds, and the more categories in an item, the more precise the ability estimates would be.

}, doi = {10.1177/0146621612469720}, url = {http://apm.sagepub.com/content/37/3/179.abstract}, author = {Wang, Wen-Chung and Liu, Chen-Wei and Wu, Shiu-Lien} } @inbook {2273, title = {Reporting differentiated literacy results in PISA by using multidimensional adaptive testing. }, booktitle = {Research on PISA.}, year = {2013}, publisher = {Dodrecht: Springer}, organization = {Dodrecht: Springer}, author = {Frey, A. and Seitz, N-N. and Kr{\"o}hne, U.} } @article {2312, title = {A Semiparametric Model for Jointly Analyzing Response Times and Accuracy in Computerized Testing}, journal = {Journal of Educational and Behavioral Statistics}, volume = {38}, number = {4}, year = {2013}, pages = {381-417}, abstract = {

The item response times (RTs) collected from computerized testing represent an underutilized type of information about items and examinees. In addition to knowing the examinees\’ responses to each item, we can investigate the amount of time examinees spend on each item. Current models for RTs mainly focus on parametric models, which have the advantage of conciseness, but may suffer from reduced flexibility to fit real data. We propose a semiparametric approach, specifically, the Cox proportional hazards model with a latent speed covariate to model the RTs, embedded within the hierarchical framework proposed by van der Linden to model the RTs and response accuracy simultaneously. This semiparametric approach combines the flexibility of nonparametric modeling and the brevity and interpretability of the parametric modeling. A Markov chain Monte Carlo method for parameter estimation is given and may be used with sparse data obtained by computerized adaptive testing. Both simulation studies and real data analysis are carried out to demonstrate the applicability of the new model.

}, doi = {10.3102/1076998612461831}, url = {http://jeb.sagepub.com/cgi/content/abstract/38/4/381}, author = {Wang, Chun and Fan, Zhewen and Chang, Hua-Hua and Douglas, Jeffrey A.} } @article {2313, title = {Speededness and Adaptive Testing}, journal = {Journal of Educational and Behavioral Statistics}, volume = {38}, number = {4}, year = {2013}, pages = {418-438}, abstract = {

Two simple constraints on the item parameters in a response\–time model are proposed to control the speededness of an adaptive test. As the constraints are additive, they can easily be included in the constraint set for a shadow-test approach (STA) to adaptive testing. Alternatively, a simple heuristic is presented to control speededness in plain adaptive testing without any constraints. Both types of control are easy to implement and do not require any other real-time parameter estimation during the test than the regular update of the test taker\’s ability estimate. Evaluation of the two approaches using simulated adaptive testing showed that the STA was especially effective. It guaranteed testing times that differed less than 10 seconds from a reference test across a variety of conditions.

}, doi = {10.3102/1076998612466143}, url = {http://jeb.sagepub.com/cgi/content/abstract/38/4/418}, author = {van der Linden, Wim J. and Xiong, Xinhui} } @article {2282, title = {Uncertainties in the Item Parameter Estimates and Robust Automated Test Assembly}, journal = {Applied Psychological Measurement}, volume = {37}, number = {2}, year = {2013}, pages = {123-139}, abstract = {

Item response theory parameters have to be estimated, and because of the estimation process, they do have uncertainty in them. In most large-scale testing programs, the parameters are stored in item banks, and automated test assembly algorithms are applied to assemble operational test forms. These algorithms treat item parameters as fixed values, and uncertainty is not taken into account. As a consequence, resulting tests might be off target or less informative than expected. In this article, the process of parameter estimation is described to provide insight into the causes of uncertainty in the item parameters. The consequences of uncertainty are studied. Besides, an alternative automated test assembly algorithm is presented that is robust against uncertainties in the data. Several numerical examples demonstrate the performance of the robust test assembly algorithm, and illustrate the consequences of not taking this uncertainty into account. Finally, some recommendations about the use of robust test assembly and some directions for further research are given.

}, doi = {10.1177/0146621612469825}, url = {http://apm.sagepub.com/content/37/2/123.abstract}, author = {Veldkamp, Bernard P. and Matteucci, Mariagiulia and de Jong, Martijn G.} } @article {2306, title = {Variable-Length Computerized Adaptive Testing Based on Cognitive Diagnosis Models}, journal = {Applied Psychological Measurement}, volume = {37}, number = {7}, year = {2013}, pages = {563-582}, abstract = {

Interest in developing computerized adaptive testing (CAT) under cognitive diagnosis models (CDMs) has increased recently. CAT algorithms that use a fixed-length termination rule frequently lead to different degrees of measurement precision for different examinees. Fixed precision, in which the examinees receive the same degree of measurement precision, is a major advantage of CAT over nonadaptive testing. In addition to the precision issue, test security is another important issue in practical CAT programs. In this study, the authors implemented two termination criteria for the fixed-precision rule and evaluated their performance under two popular CDMs using simulations. The results showed that using the two criteria with the posterior-weighted Kullback\–Leibler information procedure for selecting items could achieve the prespecified measurement precision. A control procedure was developed to control item exposure and test overlap simultaneously among examinees. The simulation results indicated that in contrast to no method of controlling exposure, the control procedure developed in this study could maintain item exposure and test overlap at the prespecified level at the expense of only a few more items.

}, doi = {10.1177/0146621613488642}, url = {http://apm.sagepub.com/content/37/7/563.abstract}, author = {Hsu, Chia-Ling and Wang, Wen-Chung and Chen, Shu-Ying} } @inbook {2274, title = {Adaptives Testen [Adaptive testing].}, booktitle = {Testtheorie und Fragebogenkonstruktion}, year = {2012}, publisher = {Heidelberg: Springer}, organization = {Heidelberg: Springer}, address = {Berlin}, author = {Frey, A.} } @article {2212, title = {Balancing Flexible Constraints and Measurement Precision in Computerized Adaptive Testing}, journal = {Educational and Psychological Measurement}, volume = {72}, number = {4}, year = {2012}, pages = {629-648}, abstract = {

Managing test specifications\—both multiple nonstatistical constraints and flexibly defined constraints\—has become an important part of designing item selection procedures for computerized adaptive tests (CATs) in achievement testing. This study compared the effectiveness of three procedures: constrained CAT, flexible modified constrained CAT, and the weighted penalty model in balancing multiple flexible constraints and maximizing measurement precision in a fixed-length CAT. The study also addressed the effect of two different test lengths\—25 items and 50 items\—and of including or excluding the randomesque item exposure control procedure with the three methods, all of which were found effective in selecting items that met flexible test constraints when used in the item selection process for longer tests. When the randomesque method was included to control for item exposure, the weighted penalty model and the flexible modified constrained CAT models performed better than did the constrained CAT procedure in maintaining measurement precision. When no item exposure control method was used in the item selection process, no practical difference was found in the measurement precision of each balancing method.

}, doi = {10.1177/0013164411431838}, url = {http://epm.sagepub.com/content/72/4/629.abstract}, author = {Moyer, Eric L. and Galindo, Jennifer L. and Dodd, Barbara G.} } @article {2047, title = {Comparison Between Dichotomous and Polytomous Scoring of Innovative Items in a Large-Scale Computerized Adaptive Test}, journal = {Educational and Psychological Measurement}, volume = {72}, year = {2012}, pages = {493-509}, abstract = {

This study explored the impact of partial credit scoring of one type of innovative items (multiple-response items) in a computerized adaptive version of a large-scale licensure pretest and operational test settings. The impacts of partial credit scoring on the estimation of the ability parameters and classification decisions in operational test settings were explored in one real data analysis and two simulation studies when two different polytomous scoring algorithms, automated polytomous scoring and rater-generated polytomous scoring, were applied. For the real data analyses, the ability estimates from dichotomous and polytomous scoring were highly correlated; the classification consistency between different scoring algorithms was nearly perfect. Information distribution changed slightly in the operational item bank. In the two simulation studies comparing each polytomous scoring with dichotomous scoring, the ability estimates resulting from polytomous scoring had slightly higher measurement precision than those resulting from dichotomous scoring. The practical impact related to classification decision was minor because of the extremely small number of items that could be scored polytomously in this current study.

}, doi = {10.1177/0013164411422903}, author = {Jiao, H. and Liu, J. and Haynie, K. and Woo, A. and Gorham, J.} } @article {2193, title = {Comparison of Exposure Controls, Item Pool Characteristics, and Population Distributions for CAT Using the Partial Credit Model}, journal = {Educational and Psychological Measurement}, volume = {72}, number = {1}, year = {2012}, pages = {159-175}, abstract = {

This study investigated item exposure control procedures under various combinations of item pool characteristics and ability distributions in computerized adaptive testing based on the partial credit model. Three variables were manipulated: item pool characteristics (120 items for each of easy, medium, and hard item pools), two ability distributions (normally distributed and negatively skewed data), and three exposure control procedures (randomesque procedure, progressive\–restricted procedure, and maximum information procedure). A number of measurement precision indexes such as descriptive statistics, correlations between known and estimated ability levels, bias, root mean squared error, and average absolute difference, exposure rates, item usage, and item overlap were computed to assess the impact of matched or nonmatched item pool and ability distributions on the accuracy of ability estimation and the performance of exposure control procedures. As expected, the medium item pool produced better precision of measurement than both the easy and hard item pools. The progressive\–restricted procedure performed better in terms of maximum exposure rates, item average overlap, and pool utilization than both the randomesque procedure and the maximum information procedure. The easy item pool with the negatively skewed data as a mismatched condition produced the worst performance.

}, doi = {10.1177/0013164411411296}, url = {http://epm.sagepub.com/content/72/1/159.abstract}, author = {Lee, HwaYoung and Dodd, Barbara G.} } @article {2305, title = {Comparison of two Bayesian methods to detect mode effects between paper-based and computerized adaptive assessments: a preliminary Monte Carlo study.}, journal = {BMC Med Res Methodol}, volume = {12}, year = {2012}, month = {2012}, pages = {124}, abstract = {

BACKGROUND: Computerized adaptive testing (CAT) is being applied to health outcome measures developed as paper-and-pencil (P\&P) instruments. Differences in how respondents answer items administered by CAT vs. P\&P can increase error in CAT-estimated measures if not identified and corrected.

METHOD: Two methods for detecting item-level mode effects are proposed using Bayesian estimation of posterior distributions of item parameters: (1) a modified robust Z (RZ) test, and (2) 95\% credible intervals (CrI) for the CAT-P\&P difference in item difficulty. A simulation study was conducted under the following conditions: (1) data-generating model (one- vs. two-parameter IRT model); (2) moderate vs. large DIF sizes; (3) percentage of DIF items (10\% vs. 30\%), and (4) mean difference in \θ estimates across modes of 0 vs. 1 logits. This resulted in a total of 16 conditions with 10 generated datasets per condition.

RESULTS: Both methods evidenced good to excellent false positive control, with RZ providing better control of false positives and with slightly higher power for CrI, irrespective of measurement model. False positives increased when items were very easy to endorse and when there with mode differences in mean trait level. True positives were predicted by CAT item usage, absolute item difficulty and item discrimination. RZ outperformed CrI, due to better control of false positive DIF.

CONCLUSIONS: Whereas false positives were well controlled, particularly for RZ, power to detect DIF was suboptimal. Research is needed to examine the robustness of these methods under varying prior assumptions concerning the distribution of item and person parameters and when data fail to conform to prior assumptions. False identification of DIF when items were very easy to endorse is a problem warranting additional investigation.

}, keywords = {Bayes Theorem, Data Interpretation, Statistical, Humans, Mathematical Computing, Monte Carlo Method, Outcome Assessment (Health Care)}, issn = {1471-2288}, doi = {10.1186/1471-2288-12-124}, author = {Riley, Barth B and Carle, Adam C} } @article {1996, title = {Computerized Adaptive Testing for Student Selection to Higher Education}, journal = {Journal of Higher Education}, year = {2012}, chapter = {1}, abstract = {

The purpose of the present study is to discuss applicability of computerized adaptive testing format as an alternative for current student selection examinations to higher education in Turkey. In the study, first problems associated with current student selection system are given. These problems exerts pressure on students that results in test anxiety, produce measurement experiences that can be criticized, and lessen credibility of student selection system. Next, computerized adaptive test are introduced and advantages they provide are presented. Then results of a study that used two research designs (simulation and live testing) were presented. Results revealed that (i) computerized adaptive format provided a reduction up to 80\% in the number of items given to students compared to paper and pencil format of student selection examination, (ii) ability estimations have high reliabilities. Correlations between ability estimations obtained from simulation and traditional format were higher than 0.80. At the end of the study solutions provided by computerized adaptive testing implementation to the current problems were discussed. Also some issues for application of CAT format for student selection examinations in Turkey are given.

}, author = {Kalender, I.} } @mastersthesis {2144, title = {Computerized adaptive testing in industrial and organizational psychology}, volume = {Ph.D.}, year = {2012}, school = {University of Twente}, type = {Ph.D. Dissertation}, address = {Twente, The Netherlands}, author = {Makransky, G.} } @article {2250, title = {Computerized Adaptive Testing Using a Class of High-Order Item Response Theory Models}, journal = {Applied Psychological Measurement}, volume = {36}, number = {8}, year = {2012}, pages = {689-706}, abstract = {

In the human sciences, a common assumption is that latent traits have a hierarchical structure. Higher order item response theory models have been developed to account for this hierarchy. In this study, computerized adaptive testing (CAT) algorithms based on these kinds of models were implemented, and their performance under a variety of situations was examined using simulations. The results showed that the CAT algorithms were very effective. The progressive method for item selection, the Sympson and Hetter method with online and freeze procedure for item exposure control, and the multinomial model for content balancing can simultaneously maintain good measurement precision, item exposure control, content balance, test security, and pool usage.

}, doi = {10.1177/0146621612459552}, url = {http://apm.sagepub.com/content/36/8/689.abstract}, author = {Huang, Hung-Yu and Chen, Po-Hsi and Wang, Wen-Chung} } @article {2205, title = {Detecting Local Item Dependence in Polytomous Adaptive Data}, journal = {Journal of Educational Measurement}, volume = {49}, number = {2}, year = {2012}, pages = {127{\textendash}147}, abstract = {

A rapidly expanding arena for item response theory (IRT) is in attitudinal and health-outcomes survey applications, often with polytomous items. In particular, there is interest in computer adaptive testing (CAT). Meeting model assumptions is necessary to realize the benefits of IRT in this setting, however. Although initial investigations of local item dependence have been studied both for polytomous items in fixed-form settings and for dichotomous items in CAT settings, there have been no publications applying local item dependence detection methodology to polytomous items in CAT despite its central importance to these applications. The current research uses a simulation study to investigate the extension of widely used pairwise statistics, Yen\&$\#$39;s Q3 Statistic and Pearson\&$\#$39;s Statistic X2, in this context. The simulation design and results are contextualized throughout with a real item bank of this type from the Patient-Reported Outcomes Measurement Information System (PROMIS).

}, issn = {1745-3984}, doi = {10.1111/j.1745-3984.2012.00165.x}, url = {http://dx.doi.org/10.1111/j.1745-3984.2012.00165.x}, author = {Mislevy, Jessica L. and Rupp, Andr{\'e} A. and Harring, Jeffrey R.} } @article {2263, title = {Development of a computerized adaptive test for depression}, journal = {Archives of General Psychiatry}, volume = {69}, year = {2012}, pages = {1105-1112}, doi = {10.1001/archgenpsychiatry.2012.14}, url = {WWW.ARCHGENPSYCHIATRY.COM}, author = {Robert D. Gibbons and David .J. Weiss and Paul A. Pilkonis and Ellen Frank and Tara Moore and Jong Bae Kim and David J. Kupfer} } @article {2249, title = {An Efficiency Balanced Information Criterion for Item Selection in Computerized Adaptive Testing}, journal = {Journal of Educational Measurement}, volume = {49}, number = {3}, year = {2012}, pages = {225{\textendash}246}, abstract = {

Successful administration of computerized adaptive testing (CAT) programs in educational settings requires that test security and item exposure control issues be taken seriously. Developing an item selection algorithm that strikes the right balance between test precision and level of item pool utilization is the key to successful implementation and long-term quality control of CAT. This study proposed a new item selection method using the \“efficiency balanced information\” criterion to address issues with the maximum Fisher information method and stratification methods. According to the simulation results, the new efficiency balanced information method had desirable advantages over the other studied item selection methods in terms of improving the optimality of CAT assembly and utilizing items with low a-values while eliminating the need for item pool stratification.

}, issn = {1745-3984}, doi = {10.1111/j.1745-3984.2012.00173.x}, url = {http://dx.doi.org/10.1111/j.1745-3984.2012.00173.x}, author = {Han, Kyung T.} } @article {2166, title = {An Empirical Evaluation of the Slip Correction in the Four Parameter Logistic Models With Computerized Adaptive Testing}, journal = {Applied Psychological Measurement}, volume = {36}, number = {2}, year = {2012}, pages = {75-87}, abstract = {

In a selected response test, aberrant responses such as careless errors and lucky guesses might cause error in ability estimation because these responses do not actually reflect the knowledge that examinees possess. In a computerized adaptive test (CAT), these aberrant responses could further cause serious estimation error due to dynamic item administration. To enhance the robust performance of CAT against aberrant responses, Barton and Lord proposed the four-parameter logistic (4PL) item response theory (IRT) model. However, most studies relevant to the 4PL IRT model were conducted based on simulation experiments. This study attempts to investigate the performance of the 4PL IRT model as a slip-correction mechanism with an empirical experiment. The results showed that the 4PL IRT model could not only reduce the problematic underestimation of the examinees\’ ability introduced by careless mistakes in practical situations but also improve measurement efficiency.

}, doi = {10.1177/0146621611432862}, url = {http://apm.sagepub.com/content/36/2/75.abstract}, author = {Yen, Yung-Chin and Ho, Rong-Guey and Laio, Wen-Wei and Chen, Li-Ju and Kuo, Ching-Chin} } @article {2044, title = {Improving personality facet scores with multidimensional computerized adaptive testing: An illustration with the NEO PI-R}, journal = {Assessment}, year = {2012}, doi = {10.1177/1073191112437756}, author = {Makransky, G. and Mortensen, E. L. and Glas, C. A. W.} } @article {2277, title = {Investigating the Effect of Item Position in Computer-Based Tests}, journal = {Journal of Educational Measurement}, volume = {49}, number = {4}, year = {2012}, pages = {362{\textendash}379}, abstract = {

Computer-based tests (CBTs) often use random ordering of items in order to minimize item exposure and reduce the potential for answer copying. Little research has been done, however, to examine item position effects for these tests. In this study, different versions of a Rasch model and different response time models were examined and applied to data from a CBT administration of a medical licensure examination. The models specifically were used to investigate whether item position affected item difficulty and item intensity estimates. Results indicated that the position effect was negligible.

}, issn = {1745-3984}, doi = {10.1111/j.1745-3984.2012.00181.x}, url = {http://dx.doi.org/10.1111/j.1745-3984.2012.00181.x}, author = {Li, Feiming and Cohen, Allan and Shen, Linjun} } @article {2113, title = {Item Overexposure in Computerized Classification Tests Using Sequential Item Selection}, journal = {Practical Assessment, Research \& Evaluation}, volume = {17}, year = {2012}, abstract = {

Computerized classification tests (CCTs) often use sequential item selection which administers items according to maximizing psychometric information at a cut point demarcating passing and failing scores. This paper illustrates why this method of item selection leads to the overexposure of a significant number of items, and the performances of three different methods for controlling maximum item exposure rates in CCTs are compared. Specifically, the Sympson-Hetter, restricted, and item eligibility methods are examined in two studies realistically simulating different types of CCTs and are evaluated based upon criteria including classification accuracy, the number of items exceeding the desired maximum exposure rate, and test overlap. The pros and cons of each method are discussed from a practical perspective.

}, author = {Huebner, A.} } @article {2279, title = {Item Selection and Ability Estimation Procedures for a Mixed-Format Adaptive Test}, journal = {Applied Measurement in Education}, volume = {25}, number = {4}, year = {2012}, pages = {305-326}, doi = {10.1080/08957347.2012.714686}, url = {http://www.tandfonline.com/doi/abs/10.1080/08957347.2012.714686}, author = {Ho, Tsung-Han and Dodd, Barbara G.} } @article {2210, title = {A Mixture Rasch Model{\textendash}Based Computerized Adaptive Test for Latent Class Identification}, journal = {Applied Psychological Measurement}, volume = {36}, number = {6}, year = {2012}, pages = {469-493}, abstract = {

This study explored a computerized adaptive test delivery algorithm for latent class identification based on the mixture Rasch model. Four item selection methods based on the Kullback\–Leibler (KL) information were proposed and compared with the reversed and the adaptive KL information under simulated testing conditions. When item separation was large, all item selection methods did not differ evidently in terms of accuracy in classifying examinees into different latent classes and estimating latent ability. However, when item separation was small, two methods with class-specific ability estimates performed better than the other two methods based on a single latent ability estimate across all latent classes. The three types of KL information distributions were compared. The KL and the reversed KL information could be the same or different depending on the ability level and the item difficulty difference between latent classes. Although the KL information and the reversed KL information were different at some ability levels and item difficulty difference levels, the use of the KL, the reversed KL, or the adaptive KL information did not affect the results substantially due to the symmetric distribution of item difficulty differences between latent classes in the simulated item pools. Item pool usage and classification convergence points were examined as well.

}, doi = {10.1177/0146621612450068}, url = {http://apm.sagepub.com/content/36/6/469.abstract}, author = {Hong Jiao, and Macready, George and Liu, Junhui and Cho, Youngmi} } @article {2230, title = {Multistage Computerized Adaptive Testing With Uniform Item Exposure}, journal = {Applied Measurement in Education}, volume = {25}, number = {2}, year = {2012}, pages = {118-141}, doi = {10.1080/08957347.2012.660363}, url = {http://www.tandfonline.com/doi/abs/10.1080/08957347.2012.660363}, author = {Edwards, Michael C. and Flora, David B. and Thissen, David} } @article {2211, title = {Panel Design Variations in the Multistage Test Using the Mixed-Format Tests}, journal = {Educational and Psychological Measurement}, volume = {72}, number = {4}, year = {2012}, pages = {574-588}, abstract = {

This study compared various panel designs of the multistage test (MST) using mixed-format tests in the context of classification testing. Simulations varied the design of the first-stage module. The first stage was constructed according to three levels of test information functions (TIFs) with three different TIF centers. Additional computerized adaptive test (CAT) conditions provided baseline comparisons. Three passing rate conditions were also included. The various MST conditions using mixed-format tests were constructed properly and performed well. When the levels of TIFs at the first stage were higher, the simulations produced a greater number of correct classifications. CAT with the randomesque-10 procedure yielded comparable results to the MST with increased levels of TIFs. Finally, all MST conditions achieved better test security results compared with CAT\’s maximum information conditions.

}, doi = {10.1177/0013164411428977}, url = {http://epm.sagepub.com/content/72/4/574.abstract}, author = {Kim, Jiseon and Chung, Hyewon and Dodd, Barbara G. and Park, Ryoungsun} } @article {2168, title = {The Problem of Bias in Person Parameter Estimation in Adaptive Testing}, journal = {Applied Psychological Measurement}, volume = {36}, number = {4}, year = {2012}, pages = {255-270}, abstract = {

It is shown that deviations of estimated from true values of item difficulty parameters, caused for example by item calibration errors, the neglect of randomness of item difficulty parameters, testlet effects, or rule-based item generation, can lead to systematic bias in point estimation of person parameters in the context of adaptive testing. This effect occurs even when the errors of the item difficulty parameters are themselves unbiased. Analytical calculations as well as simulation studies are discussed.

}, doi = {10.1177/0146621612443304}, url = {http://apm.sagepub.com/content/36/4/255.abstract}, author = {Doebler, Anna} } @article {2194, title = {On the Reliability and Validity of a Numerical Reasoning Speed Dimension Derived From Response Times Collected in Computerized Testing}, journal = {Educational and Psychological Measurement}, volume = {72}, number = {2}, year = {2012}, pages = {245-263}, abstract = {

Data from 181 college students were used to assess whether math reasoning item response times in computerized testing can provide valid and reliable measures of a speed dimension. The alternate forms reliability of the speed dimension was .85. A two-dimensional structural equation model suggests that the speed dimension is related to the accuracy of speeded responses. Speed factor scores were significantly correlated with performance on the ACT math scale. Results suggest that the speed dimension underlying response times can be reliably measured and that the dimension is related to the accuracy of performance under the pressure of time limits.

}, doi = {10.1177/0013164411408412}, url = {http://epm.sagepub.com/content/72/2/245.abstract}, author = {Davison, Mark L. and Semmes, Robert and Huang, Lan and Close, Catherine N.} } @article {2167, title = {A Stochastic Method for Balancing Item Exposure Rates in Computerized Classification Tests}, journal = {Applied Psychological Measurement}, volume = {36}, number = {3}, year = {2012}, pages = {181-188}, abstract = {

Computerized classification tests (CCTs) classify examinees into categories such as pass/fail, master/nonmaster, and so on. This article proposes the use of stochastic methods from sequential analysis to address item overexposure, a practical concern in operational CCTs. Item overexposure is traditionally dealt with in CCTs by the Sympson-Hetter (SH) method, but this method is unable to restrict the exposure of the most informative items to the desired level. The authors\’ new method of stochastic item exposure balance (SIEB) works in conjunction with the SH method and is shown to greatly reduce the number of overexposed items in a pool and improve overall exposure balance while maintaining classification accuracy comparable with using the SH method alone. The method is demonstrated using a simulation study.

}, doi = {10.1177/0146621612439932}, url = {http://apm.sagepub.com/content/36/3/181.abstract}, author = {Huebner, Alan and Li, Zhushan} } @article {2110, title = {A Stochastic Method for Balancing Item Exposure Rates in Computerized Classification Tests}, journal = {Applied Psychological Measurement}, volume = {36}, number = {3}, year = {2012}, pages = {181-188}, abstract = {

Computerized classification tests (CCTs) classify examinees into categories such as pass/fail, master/nonmaster, and so on. This article proposes the use of stochastic methods from sequential analysis to address item overexposure, a practical concern in operational CCTs. Item overexposure is traditionally dealt with in CCTs by the Sympson-Hetter (SH) method, but this method is unable to restrict the exposure of the most informative items to the desired level. The authors\’ new method of stochastic item exposure balance (SIEB) works in conjunction with the SH method and is shown to greatly reduce the number of overexposed items in a pool and improve overall exposure balance while maintaining classification accuracy comparable with using the SH method alone. The method is demonstrated using a simulation study.

}, doi = {10.1177/0146621612439932}, url = {http://apm.sagepub.com/content/36/3/181.abstract}, author = {Huebner, Alan and Li, Zhushan} } @article {2280, title = {Termination Criteria in Computerized Adaptive Tests: Do Variable-Length CATs Provide Efficient and Effective Measurement?}, journal = {Journal of Computerized Adaptive Testing}, volume = {1}, year = {2012}, pages = {1-18}, issn = {2165-6592}, doi = {10.7333/1212-0101001}, author = {Babcock, B. and Weiss, D. J.} } @conference {2080, title = {Adaptive Item Calibration and Norming: Unique Considerations of a Global Deployment}, booktitle = {Annual Conference of the International Association for Computerized Adaptive Testing}, year = {2011}, month = {10/2011}, keywords = {CAT, common item equating, Figural Reasoning Test, item calibration, norming}, author = {Alexander Schwall and Evan Sinar} } @article {365, title = {Applying computerized adaptive testing to the CES-D scale: A simulation study}, journal = {Psychiatry Research}, year = {2011}, note = {Psychiatry Res. 2011 Jan 3.}, month = {Jan 3}, edition = {2011/01/07}, abstract = {In this paper we studied the appropriateness of developing an adaptive version of the Center of Epidemiological Studies-Depression (CES-D, Radloff, 1977) scale. Computerized Adaptive Testing (CAT) involves the computerized administration of a test in which each item is dynamically selected from a pool of items until a pre-specified measurement precision is reached. Two types of analyses were performed using the CES-D responses of a large sample of adolescents (N=1392). First, it was shown that the items met the psychometric requirements needed for CAT. Second, CATs were simulated by using the existing item responses as if they had been collected adaptively. CATs selecting only a small number of items gave results which, in terms of depression measurement and criterion validity, were only marginally different from the results of full CES-D assessment. It was concluded that CAT is a very fruitful way of improving the efficiency of the CES-D questionnaire. The discussion addresses the strengths and limitations of the application of CAT in mental health research.}, isbn = {0165-1781 (Print)0165-1781 (Linking)}, issn = {21208660}, author = {Smits, N. and Cuijpers, P. and van Straten, A.} } @article {2295, title = {Applying computerized adaptive testing to the CES-D scale: A simulation study}, journal = {Psychiatry Research}, volume = {188}, year = {2011}, pages = {147{\textendash}155}, author = {Smits, N. and Cuijpers, P. and van Straten, A.} } @article {1455, title = {Better Data From Better Measurements Using Computerized Adaptive Testing}, journal = {Journal of Methods and Measurement in the Social Sciences}, volume = {Vol. 2}, number = {1}, year = {2011}, pages = {1-27}, edition = {No. 1}, abstract = {The process of constructing a fixed-length conventional test frequently focuses on maximizing internal consistency reliability by selecting test items that are of average difficulty and high discrimination (a --peaked{\textbardbl} test). The effect of constructing such a test, when viewed from the perspective of item response theory, is test scores that are precise for examinees whose trait levels are near the point at which the test is peaked; as examinee trait levels deviate from the mean, the precision of their scores decreases substantially. Results of a small simulation study demonstrate that when peaked tests are --off target{\textbardbl} for an examinee, their scores are biased and have spuriously high standard deviations, reflecting substantial amounts of error. These errors can reduce the correlations of these kinds of scores with other variables and adversely affect the results of standard statistical tests. By contrast, scores from adaptive tests are essentially unbiased and have standard deviations that are much closer to true values. Basic concepts of adaptive testing are introduced and fully adaptive computerized tests (CATs) based on IRT are described. Several examples of response records from CATs are discussed to illustrate how CATs function. Some operational issues, including item exposure, content balancing, and enemy items are also briefly discussed. It is concluded that because CAT constructs a unique test for examinee, scores from CATs will be more precise and should provide better data for social science research and applications.}, author = {Weiss, D. J.} } @conference {2077, title = {Building Affordable CD-CAT Systems for Schools To Address Today{\textquoteright}s Challenges In Assessment}, booktitle = {Annual Conference of the International Association for Computerized Adaptive Testing}, year = {2011}, keywords = {affordability, CAT, cost}, author = {Chang, Hua-Hua} } @article {2070, title = {catR: An R Package for Computerized Adaptive Testing}, journal = {Applied Psychological Measurement}, year = {2011}, abstract = {

Computerized adaptive testing (CAT) is an active current research field in psychometrics and educational measurement. However, there is very little software available to handle such adaptive tasks. The R package catR was developed to perform adaptive testing with as much flexibility as possible, in an attempt to provide a developmental and testing platform to the interested user. Several item-selection rules and ability estimators are implemented. The item bank can be provided by the user or randomly generated from parent distributions of item parameters. Three stopping rules are available. The output can be graphically displayed.

}, keywords = {computer program, computerized adaptive testing, Estimation, Item Response Theory}, doi = {10.1177/0146621611407482}, author = {Magis, D. and Ra{\^\i}che, G.} } @article {2165, title = {A Comment on Early Student Blunders on Computer-Based Adaptive Tests}, journal = {Applied Psychological Measurement}, volume = {35}, number = {2}, year = {2011}, pages = {165-174}, abstract = {

This article refutes a recent claim that computer-based tests produce biased scores for very proficient test takers who make mistakes on one or two initial items and that the \‘\‘bias\’\’ can be reduced by using a four-parameter IRT model. Because the same effect occurs with pattern scores on nonadaptive tests, the effect results from IRT scoring, not from adaptive testing. Because very proficient test takers rarely err on items of middle difficulty, the so-called bias is one of selective data analysis. Furthermore, the apparently large score penalty for one error on an otherwise perfect response pattern is shown to result from the relative stretching of the IRT scale at very high and very low proficiencies. The recommended use of a four-parameter IRT model is shown to have drawbacks.

}, doi = {10.1177/0146621610377080}, url = {http://apm.sagepub.com/content/35/2/165.abstract}, author = {Green, Bert F.} } @article {2036, title = {A Comment on Early Student Blunders on Computer-Based Adaptive Tests}, journal = {Applied Psychological Measurement}, volume = {35}, year = {2011}, pages = {165-174}, abstract = {

This article refutes a recent claim that computer-based tests produce biased scores for very proficient test takers who make mistakes on one or two initial items and that they can be reduced by using a four-parameter IRT model. Because the same effect occurs with pattern scores on nonadaptive tests, the effect results from IRT scoring, not from adaptive testing. Because very proficient test takers rarely err on items of middle difficulty, the so-called bias is one of selective data analysis. Furthermore, the apparently large score penalty for one error on an otherwise perfect response pattern is shown to result from the relative stretching of the IRT scale at very high and very low proficiencies. The recommended use of a four-parameter IRT model is shown to have drawbacks.

}, doi = {10.1177/0146621610377080}, author = {Green, B. F.} } @article {2040, title = {Computer adaptive testing for small scale programs and instructional systems}, journal = {Journal of Applied Testing Technology}, volume = {12}, year = {2011}, abstract = {

This study investigates measurement decision theory (MDT) as an underlying model for computer adaptive testing when the goal is to classify examinees into one of a finite number of groups. The first analysis compares MDT with a popular item response theory model and finds little difference in terms of the percentage of correct classifications. The second analysis examines the number of examinees needed to calibrate MDT item parameters and finds accurate classifications even with calibration sample sizes as small as 100 examinees.

}, author = {Rudner, L. M. and Guo, F.} } @article {361, title = {Computerized adaptive assessment of personality disorder: Introducing the CAT{\textendash}PD project}, journal = {Journal of Personality Assessment}, volume = {93}, number = {4}, year = {2011}, pages = {380-389}, abstract = {Assessment of personality disorders (PD) has been hindered by reliance on the problematic categorical model embodied in the most recent Diagnostic and Statistical Model of Mental Disorders (DSM), lack of consensus among alternative dimensional models, and inefficient measurement methods. This article describes the rationale for and early results from a multiyear study funded by the National Institute of Mental Health that was designed to develop an integrative and comprehensive model and efficient measure of PD trait dimensions. To accomplish these goals, we are in the midst of a 5-phase project to develop and validate the model and measure. The results of Phase 1 of the project{\textemdash}which was focused on developing the PD traits to be assessed and the initial item pool{\textemdash}resulted in a candidate list of 59 PD traits and an initial item pool of 2,589 items. Data collection and structural analyses in community and patient samples will inform the ultimate structure of the measure, and computerized adaptive testing will permit efficient measurement of the resultant traits. The resultant Computerized Adaptive Test of Personality Disorder (CAT{\textendash}PD) will be well positioned as a measure of the proposed DSM{\textendash}5 PD traits. Implications for both applied and basic personality research are discussed.}, isbn = {0022-3891}, author = {Simms, L. J. and Goldberg, L .R. and Roberts, J. E. and Watson, D. and Welte, J. and Rotterman, J. H.} } @article {2236, title = {Computerized Adaptive Testing with the Zinnes and Griggs Pairwise Preference Ideal Point Model}, journal = {International Journal of Testing}, volume = {11}, number = {3}, year = {2011}, pages = {231-247}, doi = {10.1080/15305058.2011.561459}, url = {http://www.tandfonline.com/doi/abs/10.1080/15305058.2011.561459}, author = {Stark, Stephen and Chernyshenko, Oleksandr S.} } @article {2190, title = {Computerized Classification Testing Under the Generalized Graded Unfolding Model}, journal = {Educational and Psychological Measurement}, volume = {71}, number = {1}, year = {2011}, pages = {114-128}, abstract = {

The generalized graded unfolding model (GGUM) has been recently developed to describe item responses to Likert items (agree\—disagree) in attitude measurement. In this study, the authors (a) developed two item selection methods in computerized classification testing under the GGUM, the current estimate/ability confidence interval method and the cut score/sequential probability ratio test method and (b) evaluated their accuracy and efficiency in classification through simulations. The results indicated that both methods were very accurate and efficient. The more points each item had and the fewer the classification categories, the more accurate and efficient the classification would be. However, the latter method may yield a very low accuracy in dichotomous items with a short maximum test length. Thus, if it is to be used to classify examinees with dichotomous items, the maximum text length should be increased.

}, doi = {10.1177/0013164410391575}, url = {http://epm.sagepub.com/content/71/1/114.abstract}, author = {Wang, Wen-Chung and Liu, Chen-Wei} } @article {2191, title = {Computerized Classification Testing Under the One-Parameter Logistic Response Model With Ability-Based Guessing}, journal = {Educational and Psychological Measurement}, volume = {71}, number = {6}, year = {2011}, pages = {925-941}, abstract = {

The one-parameter logistic model with ability-based guessing (1PL-AG) has been recently developed to account for effect of ability on guessing behavior in multiple-choice items. In this study, the authors developed algorithms for computerized classification testing under the 1PL-AG and conducted a series of simulations to evaluate their performances. Four item selection methods (the Fisher information, the Fisher information with a posterior distribution, the progressive method, and the adjusted progressive method) and two termination criteria (the ability confidence interval [ACI] method and the sequential probability ratio test [SPRT]) were developed. In addition, the Sympson\–Hetter online method with freeze (SHOF) was implemented for item exposure control. Major results include the following: (a) when no item exposure control was made, all the four item selection methods yielded very similar correct classification rates, but the Fisher information method had the worst item bank usage and the highest item exposure rate; (b) SHOF can successfully maintain the item exposure rate at a prespecified level, without compromising substantial accuracy and efficiency in classification; (c) once SHOF was implemented, all the four methods performed almost identically; (d) ACI appeared to be slightly more efficient than SPRT; and (e) in general, a higher weight of ability in guessing led to a slightly higher accuracy and efficiency, and a lower forced classification rate.

}, doi = {10.1177/0013164410392372}, url = {http://epm.sagepub.com/content/71/6/925.abstract}, author = {Wang, Wen-Chung and Huang, Sheng-Yun} } @article {283, title = {Content range and precision of a computer adaptive test of upper extremity function for children with cerebral palsy}, journal = {Physical \& Occupational Therapy in Pediatrics}, volume = {31}, number = {1}, year = {2011}, note = {Montpetit, KathleenHaley, StephenBilodeau, NathalieNi, PengshengTian, FengGorton, George 3rdMulcahey, M JEnglandPhys Occup Ther Pediatr. 2011 Feb;31(1):90-102. Epub 2010 Oct 13.}, pages = {90-102}, edition = {2010/10/15}, abstract = {This article reports on the content range and measurement precision of an upper extremity (UE) computer adaptive testing (CAT) platform of physical function in children with cerebral palsy. Upper extremity items representing skills of all abilities were administered to 305 parents. These responses were compared with two traditional standardized measures: Pediatric Outcomes Data Collection Instrument and Functional Independence Measure for Children. The UE CAT correlated strongly with the upper extremity component of these measures and had greater precision when describing individual functional ability. The UE item bank has wider range with items populating the lower end of the ability spectrum. This new UE item bank and CAT have the capability to quickly assess children of all ages and abilities with good precision and, most importantly, with items that are meaningful and appropriate for their age and level of physical function.}, isbn = {1541-3144 (Electronic)0194-2638 (Linking)}, author = {Montpetit, K. and Haley, S. and Bilodeau, N. and Ni, P. and Tian, F. and Gorton, G., 3rd and Mulcahey, M. J.} } @conference {2100, title = {Continuous Testing (an avenue for CAT research)}, booktitle = {Annual Conference of the International Association for Computerized Adaptive Testing}, year = {2011}, month = {10/2011}, abstract = {

Publishing an Adaptive Test

Problems with Publishing

Research Questions

}, keywords = {CAT, item filter, item filtration}, author = {G. Gage Kingsbury} } @article {2038, title = {Creating a K-12 Adaptive Test: Examining the Stability of Item Parameter Estimates and Measurement Scales}, journal = {Journal of Applied Testing Technology}, volume = {12}, year = {2011}, abstract = {

Development of adaptive tests used in K-12 settings requires the creation of stable measurement scales to measure the growth of individual students from one grade to the next, and to measure change in groups from one year to the next. Accountability systems
like No Child Left Behind require stable measurement scales so that accountability has meaning across time. This study examined the stability of the measurement scales used with the Measures of Academic Progress. Difficulty estimates for test questions from the reading and mathematics scales were examined over a period ranging from 7 to 22 years. Results showed high correlations between item difficulty estimates from the time at which they where originally calibrated and the current calibration. The average drift in item difficulty estimates was less than .01 standard deviations. The average impact of change in item difficulty estimates was less than the smallest reported difference on the score scale for two actual tests. The findings of the study indicate that an IRT scale can be stable enough to allow consistent measurement of student achievement.

}, url = {http://www.testpublishers.org/journal-of-applied-testing-technology}, author = {Kingsbury, G. G. and Wise, S. L.} } @booklet {154, title = {Cross-cultural development of an item list for computer-adaptive testing of fatigue in oncological patients}, journal = {Health and Quality of Life Outcomes}, volume = {9}, number = {1}, year = {2011}, note = {Health Qual Life Outcomes. 2011 Mar 29;9(1):19.}, month = {March 29, 2011}, pages = {10}, edition = {2011/03/31}, abstract = {ABSTRACT: INTRODUCTION: Within an ongoing project of the EORTC Quality of Life Group, we are developing computerized adaptive test (CAT) measures for the QLQ-C30 scales. These new CAT measures are conceptualised to reflect the same constructs as the QLQ-C30 scales. Accordingly, the Fatigue-CAT is intended to capture physical and general fatigue. METHODS: The EORTC approach to CAT development comprises four phases (literature search, operationalisation, pre-testing, and field testing). Phases I-III are described in detail in this paper. A literature search for fatigue items was performed in major medical databases. After refinement through several expert panels, the remaining items were used as the basis for adapting items and/or formulating new items fitting the EORTC item style. To obtain feedback from patients with cancer, these English items were translated into Danish, French, German, and Spanish and tested in the respective countries. RESULTS: Based on the literature search a list containing 588 items was generated. After a comprehensive item selection procedure focusing on content, redundancy, item clarity and item difficulty a list of 44 fatigue items was generated. Patient interviews (n=52) resulted in 12 revisions of wording and translations. DISCUSSION: The item list developed in phases I-III will be further investigated within a field-testing phase (IV) to examine psychometric characteristics and to fit an item response theory model. The Fatigue CAT based on this item bank will provide scores that are backward-compatible to the original QLQ-C30 fatigue scale.}, isbn = {1477-7525 (Electronic)1477-7525 (Linking)}, author = {Giesinger, J. M. and Petersen, M. A. and Groenvold, M. and Aaronson, N. K. and Arraras, J. I. and Conroy, T. and Gamper, E. M. and Kemmler, G. and King, M. T. and Oberguggenberger, A. S. and Velikova, G. and Young, T. and Holzner, B. and Eortc-Qlg, E. O.} } @article {2042, title = {Design of a Computer-Adaptive Test to Measure English Literacy and Numeracy in the Singapore Workforce: Considerations, Benefits, and Implications}, journal = {Journal of Applied Testing Technology}, volume = {12}, year = {2011}, abstract = {

A computer adaptive test CAT) is a delivery methodology that serves the larger goals of the assessment system in which it is embedded. A thorough analysis of the assessment system for which a CAT is being designed is critical to ensure that the delivery platform is appropriate and addresses all relevant complexities. As such, a CAT engine must be designed to conform to the
validity and reliability of the overall system. This design takes the form of adherence to the assessment goals and objectives of the adaptive assessment system. When the assessment is adapted for use in another country, consideration must be given to any necessary revisions including content differences. This article addresses these considerations while drawing, in part, on the process followed in the development of the CAT delivery system designed to test English language workplace skills for the Singapore Workforce Development Agency. Topics include item creation and selection, calibration of the item pool, analysis and testing of the psychometric properties, and reporting and interpretation of scores. The characteristics and benefits of the CAT delivery system are detailed as well as implications for testing programs considering the use of a
CAT delivery system.

}, url = {http://www.testpublishers.org/journal-of-applied-testing-technology}, author = {Jacobsen, J. and Ackermann, R. and Eg{\"u}ez, J. and Ganguli, D. and Rickard, P. and Taylor, L.} } @conference {2083, title = {Detecting DIF between Conventional and Computerized Adaptive Testing: A Monte Carlo Study}, booktitle = {Annual Conference of the International Association for Computerized Adaptive Testing}, year = {2011}, month = {10/2011}, abstract = {

A comparison od two procedures, Modified Robust Z and 95\% Credible Interval, were compared in a Monte Carlo study. Both procedures evidenced adequate control of false positive DIF results.

}, keywords = {95\% Credible Interval, CAT, DIF, differential item function, modified robust Z statistic, Monte Carlo methodologies}, author = {Barth B. Riley and Adam C. Carle} } @mastersthesis {1997, title = {Effects of Different Computerized Adaptive Testing Strategies of Recovery of Ability}, volume = {Ph.D.}, year = {2011}, abstract = {

The purpose of the present study is to compare ability estimations obtained from computerized adaptive testing (CAT) procedure with the paper and pencil test administration results of Student Selection Examination (SSE) science subtest considering different ability estimation methods and test termination rules. There are two phases in the present study. In the first phase, a post-hoc simulation was conducted to find out relationships between examinee ability levels estimated by CAT and paper and pencil test versions of the SSE. Maximum Likelihood Estimation and Expected A Posteriori were used as ability estimation method. Test termination rules were standard error threshold and fixed number of items. Second phase was actualized by implementing a CAT administration to a group of examinees to investigate performance of CAT administration in an environment other than simulated administration. Findings of post-hoc simulations indicated CAT could be implemented by using Expected A Posteriori estimation method with standard error threshold value of 0.30 or higher for SSE. Correlation between ability estimates obtained by CAT and real SSE was found to be 0.95. Mean of number of items given to examinees by CAT is 18.4. Correlation between live CAT and real SSE ability estimations was 0.74. Number of items used for CAT administration is approximately 50\% of the items in paper and pencil SSE science subtest. Results indicated that CAT for SSE science subtest provided ability estimations with higher reliability with fewer items compared to paper and pencil format.

}, author = {Kalender, I.} } @article {390, title = {A framework for the development of computerized adaptive tests}, journal = {Practical Assessment Research \& Evaluation}, volume = {16}, number = {1}, year = {2011}, publisher = {Practical Assessment Research \& Evaluation}, abstract = {A substantial amount of research has been conducted over the past 40 years on technical aspects of computerized adaptive testing (CAT), such as item selection algorithms, item exposure controls, and termination criteria. However, there is little literature providing practical guidance on the development of a CAT. This paper seeks to collate some of the available research methodologies into a general framework for the development of any CAT assessment. }, author = {Thompson, N. A. and Weiss, D. J.} } @conference {2099, title = {From Reliability to Validity: Expanding Adaptive Testing Practice to Find the Most Valid Score for Each Test Taker}, booktitle = {Annual Conference of the International Association for Computerized Adaptive Testing}, year = {2011}, month = {10/2011}, abstract = {

CAT is an exception to the traditional conception of validity. It is one of the few examples of individualized testing. Item difficulty is tailored to each examinee. The intent, however, is increased efficiency. Focus on reliability (reduced standard error); Equivalence with paper \& pencil tests is valued; Validity is enhanced through improved reliability.

How Else Might We Individualize Testing Using CAT?

An ISV-Based View of Validity

Test Event -- An examinee encounters a series of items in a particular context.

CAT Goal: individualize testing to address CIV threats to score validity (i.e., maximize ISV).

Some Research Issues:

}, keywords = {CAT, CIV, construct-irrelevant variance, Individual Score Validity, ISV, low test taking motivation, Reliability, validity}, author = {Steven L. Wise} } @conference {2079, title = {A Heuristic Of CAT Item Selection Procedure For Testlets}, booktitle = {Annual Conference of the International Association for Computerized Adaptive Testing}, year = {2011}, keywords = {CAT, shadow test, testlets}, author = {Yuehmei Chien and David Shin and Walter Denny Way} } @conference {2078, title = {High-throughput Health Status Measurement using CAT in the Era of Personal Genomics: Opportunities and Challenges}, booktitle = {Annual Conference of the International Association for Computerized Adaptive Testing}, year = {2011}, keywords = {CAT, health applications, PROMIS}, author = {Eswar Krishnan} } @article {2267, title = {Hypothetical use of multidimensional adaptive testing for the assessment of student achievement in PISA. }, journal = {Educational and Psychological Measurement}, volume = {71}, year = {2011}, pages = {503-522}, author = {Frey, A. and Seitz, N-N.} } @conference {2082, title = {Impact of Item Drift on Candidate Ability Estimation}, booktitle = {Annual Conference of the International Association for Computerized Adaptive Testing}, year = {2011}, month = {10/2011}, abstract = {

For large operational pools, candidate ability estimates appear robust to item drift, especially under conditions that may represent \‘normal\’ amounts of drift. Even with \‘extreme\’ conditions of drift (e.g., 20\% of items drifting 1.00 logits), decision consistency was still high.

}, keywords = {item drift}, author = {Sarah Hagge and Ada Woo and Phil Dickison} } @booklet {313, title = {Item banks for measuring emotional distress from the Patient-Reported Outcomes Measurement Information System (PROMIS{\textregistered}): depression, anxiety, and anger}, journal = {Assessment}, year = {2011}, chapter = {June 21, 2011}, isbn = {1073-1911}, author = {Pilkonis, P. A. and Choi, S. W. and Reise, S. P. and Stover, A. M. and Riley, W. T. and Cella, D.} } @article {2188, title = {Item Selection Criteria With Practical Constraints for Computerized Classification Testing}, journal = {Educational and Psychological Measurement}, volume = {71}, number = {1}, year = {2011}, pages = {20-36}, abstract = {

This study compares four item selection criteria for a two-category computerized classification testing: (1) Fisher information (FI), (2) Kullback\—Leibler information (KLI), (3) weighted log-odds ratio (WLOR), and (4) mutual information (MI), with respect to the efficiency and accuracy of classification decision using the sequential probability ratio test as well as the extent of item usage. The comparability of the four item selection criteria are examined primarily under three types of item selection conditions: (1) using only the four item selection algorithms, (2) using the four item selection algorithms and content balancing control, and (3) using the four item selection algorithms, content balancing control, and item exposure control. The comparability of the four item selection criteria is also evaluated in two types of proficiency distribution and three levels of indifference region width. The results show that the differences of the four item selection criteria are washed out as more realistic constraints are imposed. Moreover, within two-category classification testing, the use of MI does not necessarily generate greater efficiency than FI, WLOR, and KLI, although MI might seem attractive for its general form of formula in item selection.

}, doi = {10.1177/0013164410387336}, url = {http://epm.sagepub.com/content/71/1/20.abstract}, author = {Lin, Chuan-Ju} } @conference {2106, title = {Item Selection Methods based on Multiple Objective Approaches for Classification of Respondents into Multiple Levels}, booktitle = {Annual Conference of the International Association for Computerized Adaptive Testing}, year = {2011}, month = {10/2011}, abstract = {

Is it possible to develop new item selection methods which take advantage of the fact that we want to classify into multiple categories? New methods: Taking multiple points on the ability scale into account; Based on multiple objective approaches.

Conclusions

}, keywords = {adaptive classification test, CAT, item selection, sequential classification test}, author = {Maaike van Groen and Theo Eggen and Bernard Veldkamp} } @article {2037, title = {JATT Special Issue on Adaptive Testing: Welcome and Overview}, journal = {Journal of Applied Testing Technology}, volume = {12}, year = {2011}, month = {05/2011}, url = {http://www.testpublishers.org/journal-of-applied-testing-technology}, author = {Thompson, N. A.} } @article {2039, title = {Measuring Individual Growth With Conventional and Adaptive Tests}, journal = {Journal of Methods and Measurement in the Social Sciences}, volume = {2}, year = {2011}, pages = {80-101}, author = {Weiss, D. J. and Von Minden, S.} } @conference {2090, title = {Moving beyond Efficiency to Allow CAT to Provide Better Diagnostic Information}, booktitle = {Annual Conference of the International Association for Computerized Adaptive Testing}, year = {2011}, month = {10/2011}, abstract = {
Future CATs will provide better diagnostic information to
\–Examinees
\–Regulators, Educators, Employers
\–Test Developers
This goal will be accomplished by
\–Smart CATs which collect additional information during the test
\–Psychomagic
The time is now for Reporting
}, keywords = {CAT, dianostic information, MIRT, Multiple unidimensional scales, psychomagic, smart CAT}, author = {Brian D. Bontempo} } @article {2041, title = {National Tests in Denmark {\textendash} CAT as a Pedagogic Tool}, journal = {Journal of Applied Testing Technology}, volume = {12}, year = {2011}, abstract = {

Testing and test results can be used in different ways. They can be used for regulation and control, but they can also be a pedagogic tool for assessment of student proficiency in order to target teaching, improve learning and facilitate local pedagogical leadership. To serve these purposes the test has to be used for low stakes purposes, and to ensure this, the Danish National test results are made strictly confidential by law. The only test results that are made public are the overall national results. Because of the test design, test results are directly comparable, offering potential for monitoring added value and developing new ways of using test results in a pedagogical context. This article gives the background and status for the development of the Danish national tests, describes what is special about these tests (e.g., Information Technology [IT]-based, 3 tests in 1, adaptive), how the national test are carried out, and what
is tested. Furthermore, it describes strategies for disseminating the results to the pupil, parents, teacher, headmaster and municipality; and how the results can be used by the teacher and headmaster.

}, author = {Wandall, J.} } @article {2246, title = {A new adaptive testing algorithm for shortening health literacy assessments}, journal = {BMC Medical Informatics and Decision Making}, volume = {11}, year = {2011}, abstract = {

http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3178473/?tool=pmcentrez
}, doi = {10.1186/1472-6947-11-52}, author = {Kandula, S. and Ancker, J.S. and Kaufman, D.R. and Currie, L.M. and Qing, Z.-T.} } @article {2189, title = {A New Stopping Rule for Computerized Adaptive Testing}, journal = {Educational and Psychological Measurement}, volume = {71}, number = {1}, year = {2011}, pages = {37-53}, abstract = {

The goal of the current study was to introduce a new stopping rule for computerized adaptive testing (CAT). The predicted standard error reduction (PSER) stopping rule uses the predictive posterior variance to determine the reduction in standard error that would result from the administration of additional items. The performance of the PSER was compared with that of the minimum standard error stopping rule and a modified version of the minimum information stopping rule in a series of simulated adaptive tests, drawn from a number of item pools. Results indicate that the PSER makes efficient use of CAT item pools, administering fewer items when predictive gains in information are small and increasing measurement precision when information is abundant.

}, doi = {10.1177/0013164410387338}, url = {http://epm.sagepub.com/content/71/1/37.abstract}, author = {Choi, Seung W. and Grady, Matthew W. and Dodd, Barbara G.} } @conference {2108, title = {Optimal Calibration Designs for Computerized Adaptive Testing}, booktitle = {Annual Conference of the International Association for Computerized Adaptive Testing}, year = {2011}, month = {10/2011}, abstract = {

Optimaztion

How can we exploit the advantages of Balanced Block Design while keeping the logistics manageable?

Homogeneous Designs: Overlap between test booklets as regular as possible

Conclusions:

}, keywords = {balanced block design, CAT, item calibration, optimization, Rasch}, author = {Angela Verschoor} } @conference {2098, title = {A Paradigm for Multinational Adaptive Testing}, booktitle = {Annual Conference of the International Association for Computerized Adaptive Testing}, year = {2011}, month = {10/2011}, abstract = {

Impact of Issues in \“Exported\” Adaptive Testing

Goal is construct equivalency in the new environment

Research Questions

}, keywords = {CAT, multinational adaptive testing}, author = {A Zara} } @article {2192, title = {Polytomous Adaptive Classification Testing: Effects of Item Pool Size, Test Termination Criterion, and Number of Cutscores}, journal = {Educational and Psychological Measurement}, volume = {71}, number = {6}, year = {2011}, pages = {1006-1022}, abstract = {

Computer-adaptive classification tests focus on classifying respondents in different proficiency groups (e.g., for pass/fail decisions). To date, adaptive classification testing has been dominated by research on dichotomous response formats and classifications in two groups. This article extends this line of research to polytomous classification tests for two- and three-group scenarios (e.g., inferior, mediocre, and superior proficiencies). Results of two simulation experiments with generated and real responses (N = 2,000) to established personality scales of different length (12, 20, or 29 items) demonstrate that adaptive item presentations significantly reduce the number of items required to make such classification decisions while maintaining a consistent classification accuracy. Furthermore, the simulations highlight the importance of the selected test termination criterion, which has a significant impact on the average test length.

}, doi = {10.1177/0013164410393956}, url = {http://epm.sagepub.com/content/71/6/1006.abstract}, author = {Gnambs, Timo and Batinic, Bernad} } @conference {2081, title = {Practitioner{\textquoteright}s Approach to Identify Item Drift in CAT}, booktitle = {Annual Conference of the International Association for Computerized Adaptive Testing}, year = {2011}, month = {10/2011}, keywords = {CUSUM method, G2 statistic, IPA, item drift, item parameter drift, Lord{\textquoteright}s chi-square statistic, Raju{\textquoteright}s NCDIF}, author = {Huijuan Meng and Susan Steinkamp and Paul Jones and Joy Matthews-Lopez} } @article {2204, title = {Restrictive Stochastic Item Selection Methods in Cognitive Diagnostic Computerized Adaptive Testing}, journal = {Journal of Educational Measurement}, volume = {48}, number = {3}, year = {2011}, pages = {255{\textendash}273}, abstract = {

This paper proposes two new item selection methods for cognitive diagnostic computerized adaptive testing: the restrictive progressive method and the restrictive threshold method. They are built upon the posterior weighted Kullback-Leibler (KL) information index but include additional stochastic components either in the item selection index or in the item selection procedure. Simulation studies show that both methods are successful at simultaneously suppressing overexposed items and increasing the usage of underexposed items. Compared to item selection based upon (1) pure KL information and (2) the Sympson-Hetter method, the two new methods strike a better balance between item exposure control and measurement accuracy. The two new methods are also compared with Barrada et al.\&$\#$39;s (2008) progressive method and proportional method.

}, issn = {1745-3984}, doi = {10.1111/j.1745-3984.2011.00145.x}, url = {http://dx.doi.org/10.1111/j.1745-3984.2011.00145.x}, author = {Wang, Chun and Chang, Hua-Hua and Huebner, Alan} } @conference {2076, title = {Small-Sample Shadow Testing}, booktitle = {Annual Conference of the International Association for Computerized Adaptive Testing}, year = {2011}, keywords = {CAT, shadow test}, author = {Wallace Judd} } @conference {2105, title = {A Test Assembly Model for MST}, booktitle = {Annual Conference of the International Association for Computerized Adaptive Testing}, year = {2011}, month = {10/2011}, abstract = {

This study is just a short exploration in the matter of optimization of a MST. It is extremely hard or maybe impossible to chart influence of item pool and test specifications on optimization process. Simulations are very helpful in finding an acceptable MST.

}, keywords = {CAT, mst, multistage testing, Rasch, routing, tif}, author = {Angela Verschoor and Ingrid Radtke and Theo Eggen} } @article {2045, title = {Unproctored Internet test verification: Using adaptive confirmation testing}, journal = {Organizational Research Methods}, volume = {14}, year = {2011}, pages = {608-630}, author = {Makransky, G. and Glas, C. A. W.} } @conference {2107, title = {The Use of Decision Trees for Adaptive Item Selection and Score Estimation}, booktitle = {Annual Conference of the International Association for Computerized Adaptive Testing}, year = {2011}, month = {10/2011}, abstract = {

Conducted post-hoc simulations comparing the relative efficiency, and precision of decision trees (using CHAID and CART) vs. IRT-based CAT.

Conclusions

Decision tree methods were more efficient than CAT

But,...

Conclusions

CAT selects items based on two criteria: Item location relative to current estimate of theta, Item discrimination

Decision Trees select items that best discriminate between groups defined by the total score.

CAT is optimal only when trait level is well estimated.
Findings suggest that combining decision tree followed by CAT item selection may be advantageous.

}, keywords = {adaptive item selection, CAT, decision tree}, author = {Barth B. Riley and Rodney Funk and Michael L. Dennis and Richard D. Lennox and Matthew Finkelman} } @article {2243, title = {Using Item Response Theory and Adaptive Testing in Online Career Assessment}, journal = {Journal of Career Assessment}, volume = {19}, number = {3}, year = {2011}, pages = {274-286}, abstract = {

The present article describes the potential utility of item response theory (IRT) and adaptive testing for scale evaluation and for web-based career assessment. The article describes the principles of both IRT and adaptive testing and then illustrates these with reference to data analyses and simulation studies of the Career Confidence Inventory (CCI). The kinds of information provided by IRT are shown to give a more precise look at scale quality across the trait continuum and also to permit the use of adaptive testing, where the items administered are tailored to the individual being tested. Such tailoring can significantly reduce testing time while maintaining high quality of measurement. This efficiency is especially useful when multiscale inventories and/or a large number of scales are to be administered. Readers are encouraged to consider using these advances in career assessment.

}, doi = {10.1177/1069072710395534}, url = {http://jca.sagepub.com/cgi/content/abstract/19/3/274}, author = {Betz, Nancy E. and Turner, Brandon M.} } @conference {2101, title = {Walking the Tightrope: Using Better Content Control to Improve CAT}, booktitle = {Annual Conference of the International Association for Computerized Adaptive Testing}, year = {2011}, month = {10/2011}, abstract = {

All testing involves a balance between measurement precision and content considerations. CAT item-selection algorithms have evolved to accommodate content considerations. Reviews CAT evolution including: Original/\”Pure\” adaptive exams, Constrained CAT, Weighted-deviations method, Shadow-Test Approach, Testlets instead of fully adapted tests, Administration of one item may preclude the administration of other item(s), and item relationships.

Research Questions

}, keywords = {CAT, CAT evolution, test content}, author = {Kathleen A. Gialluca} } @inbook {2069, title = {Adaptive Mastery Testing Using a Multidimensional IRT Model}, booktitle = {Elements of Adaptive Testing}, year = {2010}, pages = {409-431}, chapter = {21}, doi = {10.1007/978-0-387-85461-8}, author = {Glas, C. A. W. and Vos, H. J.} } @inbook {2054, title = {Adaptive Tests for Measuring Anxiety and Depression}, booktitle = {Elements of Adaptive Testing}, year = {2010}, pages = {123-136}, chapter = {6}, doi = {10.1007/978-0-387-85461-8}, author = {Walter, O. B.} } @inbook {2061, title = {Assembling an Inventory of Multistage Adaptive Testing Systems}, booktitle = {Elements of Adaptive Testing}, year = {2010}, pages = {247-266}, chapter = {13}, doi = {10.1007/978-0-387-85461-8}, author = {Breithaupt, K and Ariel, A. and Hare, D. R.} } @article {2046, title = {An automatic online calibration design in adaptive testing}, journal = {Journal of Applied Testing Technology}, volume = {11}, year = {2010}, author = {Makransky, G. and Glas, C. A. W.} } @article {415, title = {Bayesian item selection in constrained adaptive testing}, journal = {Psicologica}, volume = {31}, number = {1}, year = {2010}, pages = {149-169}, abstract = {Application of Bayesian item selection criteria in computerized adaptive testing might result in improvement of bias and MSE of the ability estimates. The question remains how to apply Bayesian item selection criteria in the context of constrained adaptive testing, where large numbers of specifications have to be taken into account in the item selection process. The Shadow Test Approach is a general purpose algorithm for administering constrained CAT. In this paper it is shown how the approach can be slightly modified to handle Bayesian item selection criteria. No differences in performance were found between the shadow test approach and the modifiedapproach. In a simulation study of the LSAT, the effects of Bayesian item selection criteria are illustrated. The results are compared to item selection based on Fisher Information. General recommendations about the use of Bayesian item selection criteria are provided.}, keywords = {computerized adaptive testing}, author = {Veldkamp, B. P.} } @article {2160, title = {A Comparison of Content-Balancing Procedures for Estimating Multiple Clinical Domains in Computerized Adaptive Testing: Relative Precision, Validity, and Detection of Persons With Misfitting Responses}, journal = {Applied Psychological Measurement}, volume = {34}, number = {6}, year = {2010}, pages = {410-423}, abstract = {

This simulation study sought to compare four different computerized adaptive testing (CAT) content-balancing procedures designed for use in a multidimensional assessment with respect to measurement precision, symptom severity classification, validity of clinical diagnostic recommendations, and sensitivity to atypical responding. The four content-balancing procedures were (a) no content balancing, (b) screener-based, (c) mixed (screener plus content balancing), and (d) full content balancing. In full content balancing and in mixed content balancing following administration of the screener items, item selection was based on (a) whether the target number of items for the item\’s subscale was reached and (b) the item\’s information function. Mixed and full content balancing provided the best representation of items from each of the main subscales of the Internal Mental Distress Scale. These procedures also resulted in higher CAT to full-scale correlations for the Trauma and Homicidal/Suicidal Thought subscales and improved detection of atypical responding.

}, doi = {10.1177/0146621609349802}, url = {http://apm.sagepub.com/content/34/6/410.abstract}, author = {Barth B. Riley and Michael L. Dennis and Conrad, Kendon J.} } @article {334, title = {A comparison of content-balancing procedures for estimating multiple clinical domains in computerized adaptive testing: Relative precision, validity, and detection of persons with misfitting responses}, journal = {Applied Psychological Measurement}, volume = {34}, number = {6}, year = {2010}, pages = {410-423}, abstract = {This simulation study sought to compare four different computerized adaptive testing (CAT) content-balancing procedures designed for use in a multidimensional assessment with respect to measurement precision, symptom severity classification, validity of clinical diagnostic recommendations, and sensitivity to atypical responding. The four content-balancing procedures were (a) no content balancing, (b) screener-based, (c) mixed (screener plus content balancing), and (d) full content balancing. In full content balancing and in mixed content balancing following administration of the screener items, item selection was based on (a) whether the target numberof items for the item{\textquoteright}s subscale was reached and (b) the item{\textquoteright}s information function. Mixed and full content balancing provided the best representation of items from each of the main subscales of the Internal Mental Distress Scale. These procedures also resulted in higher CAT to full-scale correlations for the Trauma and Homicidal/Suicidal Thought subscales and improved detection of atypical responding.Keywords}, isbn = {0146-62161552-3497}, author = {Riley, B. B. and Dennis, M. L. and Conrad, K. J.} } @article {2161, title = {A Comparison of Item Selection Techniques for Testlets}, journal = {Applied Psychological Measurement}, volume = {34}, number = {6}, year = {2010}, pages = {424-437}, abstract = {

This study examined the performance of the maximum Fisher\’s information, the maximum posterior weighted information, and the minimum expected posterior variance methods for selecting items in a computerized adaptive testing system when the items were grouped in testlets. A simulation study compared the efficiency of ability estimation among the item selection techniques under varying conditions of local-item dependency when the response model was either the three-parameter-logistic item response theory or the three-parameter-logistic testlet response theory. The item selection techniques performed similarly within any particular condition, the practical implications of which are discussed within the article.

}, doi = {10.1177/0146621609349804}, url = {http://apm.sagepub.com/content/34/6/424.abstract}, author = {Murphy, Daniel L. and Dodd, Barbara G. and Vaughn, Brandon K.} } @conference {393, title = {Computerized adaptive testing based on decision trees}, booktitle = {10th IEEE International Conference on Advanced Learning Technologies}, volume = {58}, year = {2010}, pages = {191-193}, publisher = {IEEE Computer Sience}, organization = {IEEE Computer Sience}, address = {Sousse, Tunisia}, author = {Ueno, M. and Songmuang, P.} } @inbook {2050, title = {Constrained Adaptive Testing with Shadow Tests}, booktitle = {Elements of Adaptive Testing}, year = {2010}, pages = {31-56}, chapter = {2}, doi = {10.1007/978-0-387-85461-8}, author = {van der Linden, W. J.} } @inbook {2057, title = {Designing and Implementing a Multistage Adaptive Test: The Uniform CPA Exam}, booktitle = {Elements of Adaptive Testing}, year = {2010}, pages = {167-190}, chapter = {9}, doi = {10.1007/978-0-387-85461-8}, author = {Melican, G.J. and Breithaupt, K and Zhang, Y.} } @inbook {2060, title = {Designing Item Pools for Adaptive Testing}, booktitle = {Elements of Adaptive Testing}, year = {2010}, pages = {231-245}, chapter = {12}, doi = {10.1007/978-0-387-85461-8}, author = {Veldkamp, B. P. and van der Linden, W. J.} } @article {326, title = {Designing item pools to optimize the functioning of a computerized adaptive test}, journal = {Psychological Test and Assessment Modeling}, volume = {52}, number = {2}, year = {2010}, pages = {127-141}, abstract = {Computerized adaptive testing (CAT) is a testing procedure that can result in improved precision for a specified test length or reduced test length with no loss of precision. However, these attractive psychometric features of CATs are only achieved if appropriate test items are available for administration. This set of test items is commonly called an {\textquotedblleft}item pool.{\textquotedblright} This paper discusses the optimal characteristics for an item pool that will lead to the desired properties for a CAT. Then, a procedure is described for designing the statistical characteristics of the item parameters for an optimal item pool within an item response theory framework. Because true optimality is impractical, methods for achieving practical approximations to optimality are described. The results of this approach are shown for an operational testing program including comparisons to the results from the item pool currently used in that testing program.Key}, isbn = {2190-0507}, author = {Reckase, M. D.} } @inbook {2064, title = {Detecting Person Misfit in Adaptive Testing}, booktitle = {Elements of Adaptive Testing}, year = {2010}, pages = {315-329}, chapter = {16}, doi = {10.1007/978-0-387-85461-8}, author = {Meijer, R. R. and van Krimpen-Stoop, E. M. L. A.} } @article {113, title = {Detection of aberrant item score patterns in computerized adaptive testing: An empirical example using the CUSUM}, journal = {Personality and Individual Differences}, volume = {48}, number = {8}, year = {2010}, pages = {921-925}, abstract = {The scalability of individual trait scores on a computerized adaptive test (CAT) was assessed through investigating the consistency of individual item score patterns. A sample of N = 428 persons completed a personality CAT as part of a career development procedure. To detect inconsistent item score patterns, we used a cumulative sum (CUSUM) procedure. Combined information from the CUSUM, other personality measures, and interviews showed that similar estimated trait values may have a different interpretation.Implications for computer-based assessment are discussed.}, keywords = {CAT, computerized adaptive testing, CUSUM approach, person Fit}, isbn = {01918869}, author = {Egberink, I. J. L. and Meijer, R. R. and Veldkamp, B. P. and Schakel, L. and Smid, N. G.} } @article {2, title = {Deterioro de par{\'a}metros de los {\'\i}tems en tests adaptativos informatizados: estudio con eCAT [Item parameter drift in computerized adaptive testing: Study with eCAT]}, journal = {Psicothema}, volume = {22}, number = {2}, year = {2010}, note = {Abad, Francisco JOlea, JulioAguado, DavidPonsoda, VicenteBarrada, Juan REnglish AbstractSpainPsicothemaPsicothema. 2010 May;22(2):340-7.}, pages = {340-7}, edition = {2010/04/29}, abstract = {

En el presente trabajo se muestra el an\álisis realizado sobre un Test Adaptativo Informatizado (TAI) dise\ñado para la evaluaci\ón del nivel de ingl\és, denominado eCAT, con el objetivo de estudiar el deterioro de par\ámetros (parameter drift) producido desde la calibraci\ón inicial del banco de \ítems. Se ha comparado la calibraci\ón original desarrollada para la puesta en servicio del TAI (N= 3224) y la calibraci\ón actual obtenida con las aplicaciones reales del TAI (N= 7254). Se ha analizado el Funcionamiento Diferencial de los \Ítems (FDI) en funci\ón de los par\ámetros utilizados y se ha simulado el impacto que sobre el nivel de rasgo estimado tiene la variaci\ón en los par\ámetros. Los resultados muestran que se produce especialmente un deterioro de los par\ámetros a y c, que hay unimportante n\úmero de \ítems del banco para los que existe FDI y que la variaci\ón de los par\ámetros produce un impacto moderado en la estimaci\ón de \θ de los evaluados con nivel de ingl\és alto. Se concluye que los par\ámetros de los \ítems se han deteriorado y deben ser actualizados.Item parameter drift in computerized adaptive testing: Study with eCAT. This study describes the parameter drift analysis conducted on eCAT (a Computerized Adaptive Test to assess the written English level of Spanish speakers). The original calibration of the item bank (N = 3224) was compared to a new calibration obtained from the data provided by most eCAT operative administrations (N =7254). A Differential Item Functioning (DIF) study was conducted between the original and the new calibrations. The impact that the new parameters have on the trait level estimates was obtained by simulation. Results show that parameter drift is found especially for a and c parameters, an important number of bank items show DIF, and the parameter change has a moderate impact on high-level-English \θ estimates. It is then recommended to replace the original estimates by the new set. by the new set.

}, keywords = {*Software, Educational Measurement/*methods/*statistics \& numerical data, Humans, Language}, isbn = {0214-9915 (Print)0214-9915 (Linking)}, author = {Abad, F. J. and Olea, J. and Aguado, D. and Ponsoda, V. and Barrada, J} } @article {819, title = {Development and evaluation of a confidence-weighting computerized adaptive testing}, journal = {Educational Technology \& Society}, volume = {13(3)}, year = {2010}, pages = {163{\textendash}176}, author = {Yen, Y. C. and Ho, R. G. and Chen, L. J. and Chou, K. Y. and Chen, Y. L.} } @article {46, title = {Development and validation of patient-reported outcome measures for sleep disturbance and sleep-related impairments}, journal = {Sleep}, volume = {33}, number = {6}, year = {2010}, note = {Buysse, Daniel JYu, LanMoul, Douglas EGermain, AnneStover, AngelaDodds, Nathan EJohnston, Kelly LShablesky-Cade, Melissa APilkonis, Paul AAR052155/AR/NIAMS NIH HHS/United StatesU01AR52155/AR/NIAMS NIH HHS/United StatesU01AR52158/AR/NIAMS NIH HHS/United StatesU01AR52170/AR/NIAMS NIH HHS/United StatesU01AR52171/AR/NIAMS NIH HHS/United StatesU01AR52177/AR/NIAMS NIH HHS/United StatesU01AR52181/AR/NIAMS NIH HHS/United StatesU01AR52186/AR/NIAMS NIH HHS/United StatesResearch Support, N.I.H., ExtramuralValidation StudiesUnited StatesSleepSleep. 2010 Jun 1;33(6):781-92.}, month = {Jun 1}, pages = {781-92}, edition = {2010/06/17}, abstract = {STUDY OBJECTIVES: To develop an archive of self-report questions assessing sleep disturbance and sleep-related impairments (SRI), to develop item banks from this archive, and to validate and calibrate the item banks using classic validation techniques and item response theory analyses in a sample of clinical and community participants. DESIGN: Cross-sectional self-report study. SETTING: Academic medical center and participant homes. PARTICIPANTS: One thousand nine hundred ninety-three adults recruited from an Internet polling sample and 259 adults recruited from medical, psychiatric, and sleep clinics. INTERVENTIONS: None. MEASUREMENTS AND RESULTS: This study was part of PROMIS (Patient-Reported Outcomes Information System), a National Institutes of Health Roadmap initiative. Self-report item banks were developed through an iterative process of literature searches, collecting and sorting items, expert content review, qualitative patient research, and pilot testing. Internal consistency, convergent validity, and exploratory and confirmatory factor analysis were examined in the resulting item banks. Factor analyses identified 2 preliminary item banks, sleep disturbance and SRI. Item response theory analyses and expert content review narrowed the item banks to 27 and 16 items, respectively. Validity of the item banks was supported by moderate to high correlations with existing scales and by significant differences in sleep disturbance and SRI scores between participants with and without sleep disorders. CONCLUSIONS: The PROMIS sleep disturbance and SRI item banks have excellent measurement properties and may prove to be useful for assessing general aspects of sleep and SRI with various groups of patients and interventions.}, keywords = {*Outcome Assessment (Health Care), *Self Disclosure, Adult, Aged, Aged, 80 and over, Cross-Sectional Studies, Factor Analysis, Statistical, Female, Humans, Male, Middle Aged, Psychometrics, Questionnaires, Reproducibility of Results, Sleep Disorders/*diagnosis, Young Adult}, isbn = {0161-8105 (Print)0161-8105 (Linking)}, author = {Buysse, D. J. and Yu, L. and Moul, D. E. and Germain, A. and Stover, A. and Dodds, N. E. and Johnston, K. L. and Shablesky-Cade, M. A. and Pilkonis, P. A.} } @article {312, title = {Development of computerized adaptive testing (CAT) for the EORTC QLQ-C30 physical functioning dimension}, journal = {Quality of Life Research }, volume = {20}, number = {4}, year = {2010}, note = {Qual Life Res. 2010 Oct 23.}, pages = {479-490}, edition = {2010/10/26}, abstract = {PURPOSE: Computerized adaptive test (CAT) methods, based on item response theory (IRT), enable a patient-reported outcome instrument to be adapted to the individual patient while maintaining direct comparability of scores. The EORTC Quality of Life Group is developing a CAT version of the widely used EORTC QLQ-C30. We present the development and psychometric validation of the item pool for the first of the scales, physical functioning (PF). METHODS: Initial developments (including literature search and patient and expert evaluations) resulted in 56 candidate items. Responses to these items were collected from 1,176 patients with cancer from Denmark, France, Germany, Italy, Taiwan, and the United Kingdom. The items were evaluated with regard to psychometric properties. RESULTS: Evaluations showed that 31 of the items could be included in a unidimensional IRT model with acceptable fit and good content coverage, although the pool may lack items at the upper extreme (good PF). There were several findings of significant differential item functioning (DIF). However, the DIF findings appeared to have little impact on the PF estimation. CONCLUSIONS: We have established an item pool for CAT measurement of PF and believe that this CAT instrument will clearly improve the EORTC measurement of PF.}, isbn = {1573-2649 (Electronic)0962-9343 (Linking)}, author = {Petersen, M. A. and Groenvold, M. and Aaronson, N. K. and Chie, W. C. and Conroy, T. and Costantini, A. and Fayers, P. and Helbostad, J. and Holzner, B. and Kaasa, S. and Singer, S. and Velikova, G. and Young, T.} } @article {510, title = {Efficiency of static and computer adaptive short forms compared to full-length measures of depressive symptoms}, journal = {Quality of Life Research}, volume = {19(1)}, year = {2010}, pages = {125{\textendash}136}, author = {Choi, S. and Reise, S. P. and Pilkonis, P. A. and Hays, R. D. and Cella, D.} } @book {2048, title = {Elements of Adaptive Testing}, year = {2010}, pages = {437}, publisher = {Springer}, organization = {Springer}, address = {New York}, doi = {10.1007/978-0-387-85461-8}, author = {van der Linden, W. J. and Glas, C. A. W.} } @inbook {2063, title = {Estimation of the Parameters in an Item-Cloning Model for Adaptive Testing}, booktitle = {Elements of Adaptive Testing}, year = {2010}, pages = {289-314}, chapter = {15}, doi = {10.1007/978-0-387-85461-8 }, author = {Glas, C. A. W. and van der Linden, W. J. and Geerlings, H.} } @article {32, title = {Features of the sampling distribution of the ability estimate in computerized adaptive testing according to two stopping rules}, journal = {Journal of Applied Measurement}, volume = {11}, number = {4}, year = {2010}, note = {Blais, Jean-GuyRaiche, GillesUnited StatesJournal of applied measurementJ Appl Meas. 2010;11(4):424-31.}, pages = {424-31}, edition = {2010/12/18}, abstract = {Whether paper and pencil or computerized adaptive, tests are usually described by a set of rules managing how they are administered: which item will be first, which should follow any given item, when to administer the last one. This article focus on the latter and looks at the effect of two stopping rules on the estimated sampling distribution of the ability estimate in a CAT: the number of items administered and the a priori determined size of the standard error of the ability estimate.}, isbn = {1529-7713 (Print)1529-7713 (Linking)}, author = {Blais, J. G. and Raiche, G.} } @inbook {2056, title = {Implementing the Graduate Management Admission Test Computerized Adaptive Test}, booktitle = {Elements of Adaptive Testing}, year = {2010}, pages = {151-166}, chapter = {8}, doi = {10.1007/978-0-387-85461-8}, author = {Rudner, L. M.} } @article {2187, title = {Improving Cognitive Diagnostic Computerized Adaptive Testing by Balancing Attribute Coverage: The Modified Maximum Global Discrimination Index Method}, journal = {Educational and Psychological Measurement}, volume = {70}, number = {6}, year = {2010}, pages = {902-913}, abstract = {

This article proposes a new item selection method, namely, the modified maximum global discrimination index (MMGDI) method, for cognitive diagnostic computerized adaptive testing (CD-CAT). The new method captures two aspects of the appeal of an item: (a) the amount of contribution it can make toward adequate coverage of every attribute and (b) the amount of contribution it can make toward recovering the latent cognitive profile. A simulation study shows that the new method ensures adequate coverage of every attribute, which improves the validity of the test scores, and defensibility of the proposed uses of the test. Furthermore, compared with the original global discrimination index method, the MMGDI method improves the recovery rate of each attribute and of the entire cognitive profile, especially the latter. Therefore, the new method improves both the validity and reliability of the test scores from a CD-CAT program.

}, doi = {10.1177/0013164410366693}, url = {http://epm.sagepub.com/content/70/6/902.abstract}, author = {Ying Cheng,} } @inbook {2059, title = {Innovative Items for Computerized Testing}, booktitle = {Elements of Adaptive Testing}, year = {2010}, pages = {215-230}, chapter = {11}, doi = {10.1007/978-0-387-85461-8}, author = {Parshall, C. G. and Harmes, J. C. and Davey, T. and Pashley, P. J.} } @inbook {2065, title = {The Investigation of Differential Item Functioning in Adaptive Tests}, booktitle = {Elements of Adaptive Testing}, year = {2010}, pages = {331-352}, chapter = {17}, doi = {10.1007/978-0-387-85461-8}, author = {Zwick, R.} } @inbook {2062, title = {Item Parameter Estimation and Item Fit Analysis}, booktitle = {Elements of Adaptive Testing}, year = {2010}, pages = {269-288}, chapter = {14}, doi = {10.1007/978-0-387-85461-8}, author = {Glas, C. A. W.} } @inbook {2049, title = {Item Selection and Ability Estimation in Adaptive Testing}, booktitle = {Elements of Adaptive Testing}, year = {2010}, pages = {3-30}, publisher = {Springer}, organization = {Springer}, chapter = {1}, address = {New York}, doi = {10.1007/978-0-387-85461-8}, author = {van der Linden, W. J. and Pashley, P. J.} } @article {2071, title = {Item Selection and Hypothesis Testing for the Adaptive Measurement of Change}, journal = {Applied Psychological Measurement}, volume = {34}, year = {2010}, pages = {238-254}, abstract = {

Assessing individual change is an important topic in both psychological and educational measurement. An adaptive measurement of change (AMC) method had previously been shown to exhibit greater efficiency in detecting change than conventional nonadaptive methods. However, little work had been done to compare different procedures within the AMC framework. This study introduced a new item selection criterion and two new test statistics for detecting change with AMC that were specifically designed for the paradigm of hypothesis testing. In two simulation sets, the new methods for detecting significant change improved on existing procedures by demonstrating better adherence to Type I error rates and substantially better power for detecting relatively small change.\ 

}, keywords = {change, computerized adaptive testing, individual change, Kullback{\textendash}Leibler information, likelihood ratio, measuring change}, doi = {10.1177/0146621609344844}, author = {Finkelman, M. D. and Weiss, D. J. and Kim-Kang, G.} } @inbook {2058, title = {A Japanese Adaptive Test of English as a Foreign Language: Developmental and Operational Aspects}, booktitle = {Elements of Adaptive Testing}, year = {2010}, pages = {191-211}, chapter = {10}, doi = {10.1007/978-0-387-85461-8}, author = {Nogami, Y. and Hayashi, N.} } @article {2125, title = {Manual for CATSim: Comprehensive simulation of computerized adaptive testing}, year = {2010}, publisher = {Assessment Systems Corporation}, address = {St. Paul, MN}, author = {Weiss, D. J. and Guyer, R. D.} } @article {157, title = {Marginal likelihood inference for a model for item responses and response times}, journal = {British Journal of Mathematical and Statistical Psychology}, volume = {63}, number = {3}, year = {2010}, note = {Glas, Cees A Wvan der Linden, Wim JResearch Support, Non-U.S. Gov{\textquoteright}tEnglandThe British journal of mathematical and statistical psychologyBr J Math Stat Psychol. 2010 Nov;63(Pt 3):603-26. Epub 2010 Jan 28.}, pages = {603-26}, edition = {2010/01/30}, abstract = {

Marginal maximum-likelihood procedures for parameter estimation and testing the fit of a hierarchical model for speed and accuracy on test items are presented. The model is a composition of two first-level models for dichotomous responses and response times along with multivariate normal models for their item and person parameters. It is shown how the item parameters can easily be estimated using Fisher\&$\#$39;s identity. To test the fit of the model, Lagrange multiplier tests of the assumptions of subpopulation invariance of the item parameters (i.e., no differential item functioning), the shape of the response functions, and three different types of conditional independence were derived. Simulation studies were used to show the feasibility of the estimation and testing procedures and to estimate the power and Type I error rate of the latter. In addition, the procedures were applied to an empirical data set from a computerized adaptive test of language comprehension.

}, isbn = {0007-1102 (Print)0007-1102 (Linking)}, author = {Glas, C. A. W. and van der Linden, W. J.} } @inbook {2055, title = {MATHCAT: A Flexible Testing System in Mathematics Education for Adults}, booktitle = {Elements of Adaptive Testing}, year = {2010}, pages = {137-150}, chapter = {7}, doi = {10.1007/978-0-387-85461-8}, author = {Verschoor, Angela J. and Straetmans, G. J. J. M.} } @article {2162, title = {A Method for the Comparison of Item Selection Rules in Computerized Adaptive Testing}, journal = {Applied Psychological Measurement}, volume = {34}, number = {6}, year = {2010}, pages = {438-452}, abstract = {

In a typical study comparing the relative efficiency of two item selection rules in computerized adaptive testing, the common result is that they simultaneously differ in accuracy and security, making it difficult to reach a conclusion on which is the more appropriate rule. This study proposes a strategy to conduct a global comparison of two or more selection rules. A plot showing the performance of each selection rule for several maximum exposure rates is obtained and the whole plot is compared with other rule plots. The strategy was applied in a simulation study with fixed-length CATs for the comparison of six item selection rules: the point Fisher information, Fisher information weighted by likelihood, Kullback-Leibler weighted by likelihood, maximum information stratification with blocking, progressive and proportional methods. Our results show that there is no optimal rule for any overlap value or root mean square error (RMSE). The fact that a rule, for a given level of overlap, has lower RMSE than another does not imply that this pattern holds for another overlap rate. A fair comparison of the rules requires extensive manipulation of the maximum exposure rates. The best methods were the Kullback-Leibler weighted by likelihood, the proportional method, and the maximum information stratification method with blocking.

}, doi = {10.1177/0146621610370152}, url = {http://apm.sagepub.com/content/34/6/438.abstract}, author = {Barrada, Juan Ram{\'o}n and Olea, Julio and Ponsoda, Vicente and Abad, Francisco Jos{\'e}} } @article {2235, title = {A Monte Carlo Simulation Investigating the Validity and Reliability of Ability Estimation in Item Response Theory with Speeded Computer Adaptive Tests}, journal = {International Journal of Testing}, volume = {10}, number = {3}, year = {2010}, pages = {230-261}, doi = {10.1080/15305058.2010.488098}, url = {http://www.tandfonline.com/doi/abs/10.1080/15305058.2010.488098}, author = {Schmitt, T. A. and Sass, D. A. and Sullivan, J. R. and Walker, C. M.} } @inbook {2052, title = {Multidimensional Adaptive Testing with Kullback{\textendash}Leibler Information Item Selection}, booktitle = {Elements of Adaptive Testing}, year = {2010}, pages = {77-102}, chapter = {4}, doi = {10.1007/978-0-387-85461-8}, author = {Mulder, J. and van der Linden, W. J.} } @article {2268, title = {Multidimensionale adaptive Kompetenzdiagnostik: Ergebnisse zur Messeffizienz [Multidimensional adaptive testing of competencies: Results regarding measurement efficiency].}, journal = {Zeitschrift f{\"u}r P{\"a}dagogik}, volume = {56}, year = {2010}, pages = {40-51}, author = {Frey, A. and Seitz, N-N.} } @inbook {2066, title = {Multistage Testing: Issues, Designs, and Research}, booktitle = {Elements of Adaptive Testing}, year = {2010}, pages = {355-372}, chapter = {18}, doi = {10.1007/978-0-387-85461-8}, author = {Zenisky, A. L. and Hambleton, R. K. and Luecht, RM} } @article {80, title = {A new stopping rule for computerized adaptive testing}, journal = {Educational and Psychological Measurement}, volume = {70}, number = {6}, year = {2010}, note = {U01 AR052177-04/NIAMS NIH HHS/Educ Psychol Meas. 2010 Dec 1;70(6):1-17.}, month = {Dec 1}, pages = {1-17}, edition = {2011/02/01}, abstract = {The goal of the current study was to introduce a new stopping rule for computerized adaptive testing. The predicted standard error reduction stopping rule (PSER) uses the predictive posterior variance to determine the reduction in standard error that would result from the administration of additional items. The performance of the PSER was compared to that of the minimum standard error stopping rule and a modified version of the minimum information stopping rule in a series of simulated adaptive tests, drawn from a number of item pools. Results indicate that the PSER makes efficient use of CAT item pools, administering fewer items when predictive gains in information are small and increasing measurement precision when information is abundant.}, isbn = {0013-1644 (Print)0013-1644 (Linking)}, author = {Choi, S. W. and Grady, M. W. and Dodd, B. G.} } @article {64, title = {Online calibration via variable length computerized adaptive testing}, journal = {Psychometrika}, volume = {75}, number = {1}, year = {2010}, pages = {140-157}, abstract = {Item calibration is an essential issue in modern item response theory based psychological or educational testing. Due to the popularity of computerized adaptive testing, methods to efficiently calibrate new items have become more important than that in the time when paper and pencil test administration is the norm. There are many calibration processes being proposed and discussed from both theoretical and practical perspectives. Among them, the online calibration may be one of the most cost effective processes. In this paper, under a variable length computerized adaptive testing scenario, we integrate the methods of adaptive design, sequential estimation, and measurement error models to solve online item calibration problems. The proposed sequential estimate of item parameters is shown to be strongly consistent and asymptotically normally distributed with a prechosen accuracy. Numerical results show that the proposed method is very promising in terms of both estimation accuracy and efficiency. The results of using calibrated items to estimate the latent trait levels are also reported.}, isbn = {0033-3123}, author = {Chang, Y. I. and Lu, H. Y.} } @inbook {2051, title = {Principles of Multidimensional Adaptive Testing}, booktitle = {Elements of Adaptive Testing}, year = {2010}, pages = {57-76}, chapter = {3}, doi = {10.1007/978-0-387-85461-8}, author = {Segall, D. O.} } @article {2159, title = {A Procedure for Controlling General Test Overlap in Computerized Adaptive Testing}, journal = {Applied Psychological Measurement}, volume = {34}, number = {6}, year = {2010}, pages = {393-409}, abstract = {

To date, exposure control procedures that are designed to control test overlap in computerized adaptive tests (CATs) are based on the assumption of item sharing between pairs of examinees. However, in practice, examinees may obtain test information from more than one previous test taker. This larger scope of information sharing needs to be considered in conducting test overlap control. The purpose of this study is to propose a test overlap control method such that the proportion of overlapping items encountered by an examinee with a group of previous examinees (described as general test overlap rate) can be controlled. Results indicated that item exposure rate and general test overlap rate could be simultaneously controlled by implementing the procedure. In addition, these two indices were controlled on the fly without any iterative simulations conducted prior to operational CATs. Thus, the proposed procedure would be an efficient method for controlling both the item exposure and general test overlap in CATs.

}, doi = {10.1177/0146621610367788}, url = {http://apm.sagepub.com/content/34/6/393.abstract}, author = {Chen, Shu-Ying} } @inbook {2053, title = {Sequencing an Adaptive Test Battery}, booktitle = {Elements of Adaptive Testing}, year = {2010}, chapter = {5}, doi = {10.1007/978-0-387-85461-8}, author = {van der Linden, W. J.} } @article {2123, title = {SimulCAT: Windows application that simulates computerized adaptive test administration}, year = {2010}, url = {http://www.hantest.net/simulcat}, author = {Han, K. T.} } @article {2203, title = {Stratified and Maximum Information Item Selection Procedures in Computer Adaptive Testing}, journal = {Journal of Educational Measurement}, volume = {47}, number = {2}, year = {2010}, pages = {202{\textendash}226}, abstract = {

In this study we evaluated and compared three item selection procedures: the maximum Fisher information procedure (F), the a-stratified multistage computer adaptive testing (CAT) (STR), and a refined stratification procedure that allows more items to be selected from the high a strata and fewer items from the low a strata (USTR), along with completely random item selection (RAN). The comparisons were with respect to error variances, reliability of ability estimates and item usage through CATs simulated under nine test conditions of various practical constraints and item selection space. The results showed that F had an apparent precision advantage over STR and USTR under unconstrained item selection, but with very poor item usage. USTR reduced error variances for STR under various conditions, with small compromises in item usage. Compared to F, USTR enhanced item usage while achieving comparable precision in ability estimates; it achieved a precision level similar to F with improved item usage when items were selected under exposure control and with limited item selection space. The results provide implications for choosing an appropriate item selection procedure in applied settings.

}, issn = {1745-3984}, doi = {10.1111/j.1745-3984.2010.00109.x}, url = {http://dx.doi.org/10.1111/j.1745-3984.2010.00109.x}, author = {Deng, Hui and Ansley, Timothy and Chang, Hua-Hua} } @article {2117, title = {Stratified and maximum information item selection procedures in computer adaptive testing}, journal = {Journal of Educational Measurement}, volume = {47}, year = {2010}, pages = {202-226}, author = {Deng, H. and Ansley, T. and Chang, H.-H.} } @inbook {2068, title = {Testlet-Based Adaptive Mastery Testing}, booktitle = {Elements of Adaptive Testing}, year = {2010}, pages = {387-409}, chapter = {20}, doi = {10.1007/978-0-387-85461-8}, author = {Vos, H. J. and Glas, C. A. W.} } @article {298, title = {Tests informatizados y otros nuevos tipos de tests [Computerized and other new types of tests]}, journal = {Papeles del Psic{\'o}logo}, volume = {31}, number = {1}, year = {2010}, pages = {94-107}, abstract = {Recientemente se ha producido un considerable desarrollo de los tests adaptativos informatizados, en los que el test se adapta progresivamente al rendimiento del evaluando, y de otros tipos de tests: a) los test basados en modelos (se dispone de un modelo o teor{\'\i}a de c{\'o}mo se responde a cada {\'\i}tem, lo que permite predecir su dificultad), b) los tests ipsativos (el evaluado ha de elegir entre opciones que tienen parecida deseabilidad social, por lo que pueden resultar eficaces para controlar algunos sesgos de respuestas), c) los tests conductuales (miden rasgos que ordinariamente se han venido midiendo con autoinformes, mediante tareas que requieren respuestas no verbales) y d) los tests situacionales (en los que se presenta al evaluado una situaci{\'o}n de conflicto laboral, por ejemplo, con varias posibles soluciones, y ha de elegir la que le parece la mejor descripci{\'o}n de lo que el har{\'\i}a en esa situaci{\'o}n). El art{\'\i}culo comenta las caracter{\'\i}sticas, ventajas e inconvenientes de todos ellos y muestra algunos ejemplos de tests concretos. Palabras clave: Test adaptativo informatizado, Test situacional, Test comportamental, Test ipsativo y generaci{\'o}n autom{\'a}tica de {\'\i}tems.The paper provides a short description of some test types that are earning considerable interest in both research and applied areas. The main feature of a computerized adaptive test is that in despite of the examinees receiving different sets of items, their test scores are in the same metric and can be directly compared. Four other test types are considered: a) model-based tests (a model or theory is available to explain the item response process and this makes the prediction of item difficulties possible), b) ipsative tests (the examinee has to select one among two or more options with similar social desirability; so, these tests can help to control faking or other examinee{\textquoteright}s response biases), c) behavioral tests (personality traits are measured from non-verbal responses rather than from self-reports), and d) situational tests (the examinee faces a conflictive situation and has to select the option that best describes what he or she will do). The paper evaluates these types of tests, comments on their pros and cons and provides some specific examples. Key words: Computerized adaptive test, Situational test, Behavioral test, Ipsative test and y automatic item generation.}, author = {Olea, J. and Abad, F. J. and Barrada, J} } @inbook {2067, title = {Three-Category Adaptive Classification Testing}, booktitle = {Elements of Adaptive Testing}, year = {2010}, pages = {373-387}, chapter = {19}, doi = {10.1007/978-0-387-85461-8}, author = {Theo Eggen} } @article {151, title = {The use of PROMIS and assessment center to deliver patient-reported outcome measures in clinical research}, journal = {Journal of Applied Measurement}, volume = {11}, number = {3}, year = {2010}, pages = {304-314}, abstract = {The Patient-Reported Outcomes Measurement Information System (PROMIS) was developed as one of the first projects funded by the NIH Roadmap for Medical Research Initiative to re-engineer the clinical research enterprise. The primary goal of PROMIS is to build item banks and short forms that measure key health outcome domains that are manifested in a variety of chronic diseases which could be used as a "common currency" across research projects. To date, item banks, short forms and computerized adaptive tests (CAT) have been developed for 13 domains with relevance to pediatric and adult subjects. To enable easy delivery of these new instruments, PROMIS built a web-based resource (Assessment Center) for administering CATs and other self-report data, tracking item and instrument development, monitoring accrual, managing data, and storing statistical analysis results. Assessment Center can also be used to deliver custom researcher developed content, and has numerous features that support both simple and complicated accrual designs (branching, multiple arms, multiple time points, etc.). This paper provides an overview of the development of the PROMIS item banks and details Assessment Center functionality.}, isbn = {1529-7713}, author = {Gershon, R. C. and Rothrock, N. and Hanrahan, R. and Bass, M. and Cella, D.} } @booklet {323, title = {Validation of a computer-adaptive test to evaluate generic health-related quality of life}, journal = {Health and Quality of Life Outcomes}, volume = {8}, year = {2010}, note = {Rebollo, PabloCastejon, IgnacioCuervo, JesusVilla, GuillermoGarcia-Cueto, EduardoDiaz-Cuervo, HelenaZardain, Pilar CMuniz, JoseAlonso, JordiSpanish CAT-Health Research GroupEnglandHealth Qual Life Outcomes. 2010 Dec 3;8:147.}, pages = {147}, edition = {2010/12/07}, abstract = {BACKGROUND: Health Related Quality of Life (HRQoL) is a relevant variable in the evaluation of health outcomes. Questionnaires based on Classical Test Theory typically require a large number of items to evaluate HRQoL. Computer Adaptive Testing (CAT) can be used to reduce tests length while maintaining and, in some cases, improving accuracy. This study aimed at validating a CAT based on Item Response Theory (IRT) for evaluation of generic HRQoL: the CAT-Health instrument. METHODS: Cross-sectional study of subjects aged over 18 attending Primary Care Centres for any reason. CAT-Health was administered along with the SF-12 Health Survey. Age, gender and a checklist of chronic conditions were also collected. CAT-Health was evaluated considering: 1) feasibility: completion time and test length; 2) content range coverage, Item Exposure Rate (IER) and test precision; and 3) construct validity: differences in the CAT-Health scores according to clinical variables and correlations between both questionnaires. RESULTS: 396 subjects answered CAT-Health and SF-12, 67.2\% females, mean age (SD) 48.6 (17.7) years. 36.9\% did not report any chronic condition. Median completion time for CAT-Health was 81 seconds (IQ range = 59-118) and it increased with age (p < 0.001). The median number of items administered was 8 (IQ range = 6-10). Neither ceiling nor floor effects were found for the score. None of the items in the pool had an IER of 100\% and it was over 5\% for 27.1\% of the items. Test Information Function (TIF) peaked between levels -1 and 0 of HRQoL. Statistically significant differences were observed in the CAT-Health scores according to the number and type of conditions. CONCLUSIONS: Although domain-specific CATs exist for various areas of HRQoL, CAT-Health is one of the first IRT-based CATs designed to evaluate generic HRQoL and it has proven feasible, valid and efficient, when administered to a broad sample of individuals attending primary care settings.}, isbn = {1477-7525 (Electronic)1477-7525 (Linking)}, author = {Rebollo, P. and Castejon, I. and Cuervo, J. and Villa, G. and Garcia-Cueto, E. and Diaz-Cuervo, H. and Zardain, P. C. and Muniz, J. and Alonso, J.} } @article {2158, title = {Variations on Stochastic Curtailment in Sequential Mastery Testing}, journal = {Applied Psychological Measurement}, volume = {34}, number = {1}, year = {2010}, pages = {27-45}, abstract = {

In sequential mastery testing (SMT), assessment via computer is used to classify examinees into one of two mutually exclusive categories. Unlike paper-and-pencil tests, SMT has the capability to use variable-length stopping rules. One approach to shortening variable-length tests is stochastic curtailment, which halts examination if the probability of changing classification decisions is low. The estimation of such a probability is therefore a critical component of a stochastically curtailed test. This article examines several variations on stochastic curtailment where the key probability is estimated more aggressively than the standard formulation, resulting in additional savings in average test length (ATL). In two simulation sets, the variations successfully reduced the ATL, and in many cases the average loss, compared with the standard formulation.

}, doi = {10.1177/0146621609336113}, url = {http://apm.sagepub.com/content/34/1/27.abstract}, author = {Finkelman, Matthew David} } @inbook {1833, title = {Adaptive computer-based tasks under an assessment engineering paradigm}, year = {2009}, note = {{PDF File, 289 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2009 GMAC Conference on Computerized Adaptive Testing.}, author = {Luecht, RM} } @inbook {1808, title = {Adaptive item calibration: A process for estimating item parameters within a computerized adaptive test}, year = {2009}, note = {{PDF File, 286 KB} {PDF File, 286 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2009 GMAC Conference on Computerized Adaptive Testing.}, abstract = {The characteristics of an adaptive test change the characteristics of the field testing that is necessary to add items to an existing measurement scale. The process used to add field-test items to the adaptive test might lead to scale drift or disrupt the test by administering items of inappropriate difficulty. The current study makes use of the transitivity of examinee and item in item response theory to describe a process for adaptive item calibration. In this process an item is successively administered to examinees whose ability levels match the performance of a given field-test item. By treating the item as if it were taking an adaptive test, examinees can be selected who provide the most information about the item at its momentary difficulty level. This should provide a more efficient procedure for estimating item parameters. The process is described within the context of the one-parameter logistic IRT model. The process is then simulated to identify whether it can be more accurate and efficient than random presentation of field-test items to examinees. Results indicated that adaptive item calibration might provide a viable approach to item calibration within the context of an adaptive test. It might be most useful for expanding item pools in settings with small sample sizes or needs for large numbers of items.}, author = {Kingsbury, G. G.} } @article {206, title = {An adaptive testing system for supporting versatile educational assessment}, journal = {Computers and Education}, volume = {52}, number = {1}, year = {2009}, note = {doi: DOI: 10.1016/j.compedu.2008.06.007}, pages = {53-67}, abstract = {With the rapid growth of computer and mobile technology, it is a challenge to integrate computer based test (CBT) with mobile learning (m-learning) especially for formative assessment and self-assessment. In terms of self-assessment, computer adaptive test (CAT) is a proper way to enable students to evaluate themselves. In CAT, students are assessed through a process that uses item response theory (IRT), a well-founded psychometric theory. Furthermore, a large item bank is indispensable to a test, but when a CAT system has a large item bank, the test item selection of IRT becomes more tedious. Besides the large item bank, item exposure mechanism is also essential to a testing system. However, IRT all lack the above-mentioned points. These reasons have motivated the authors to carry out this study. This paper describes a design issue aimed at the development and implementation of an adaptive testing system. The system can support several assessment functions and different devices. Moreover, the researchers apply a novel approach, particle swarm optimization (PSO) to alleviate the computational complexity and resolve the problem of item exposure. Throughout the development of the system, a formative evaluation was embedded into an integral part of the design methodology that was used for improving the system. After the system was formally released onto the web, some questionnaires and experiments were conducted to evaluate the usability, precision, and efficiency of the system. The results of these evaluations indicated that the system provides an adaptive testing for different devices and supports versatile assessment functions. Moreover, the system can estimate students{\textquoteright} ability reliably and validly and conduct an adaptive test efficiently. Furthermore, the computational complexity of the system was alleviated by the PSO approach. By the approach, the test item selection procedure becomes efficient and the average best fitness values are very close to the optimal solutions.}, keywords = {Architectures for educational technology system, Distance education and telelearning}, isbn = {0360-1315}, author = {Huang, Y-M. and Lin, Y-T. and Cheng, S-C.} } @inbook {1805, title = {Adequacy of an item pool measuring proficiency in English language to implement a CAT procedure}, year = {2009}, note = {{PDF File, 160 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2009 GMAC Conference on Computerized Adaptive Testing.}, author = {Karino, C. A. and Costa, D. R. and Laros, J. A.} } @inbook {1769, title = {Applications of CAT in admissions to higher education in Israel: Twenty-two years of experience}, year = {2009}, note = {{PDF file, 326 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2009 GMAC Conference on Computerized Adaptive Testing.}, author = {Gafni, N. and Cohen, Y. and Roded, K and Baumer, M and Moshinsky, A.} } @inbook {1864, title = {An approach to implementing adaptive testing using item response theory both offline and online}, year = {2009}, note = {{PDF File, 172 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2009 GMAC Conference on Computerized Adaptive Testing.}, author = {Padaki, M. and Natarajan, V.} } @inbook {1732, title = {Assessing the equivalence of Internet-based vs. paper-and-pencil psychometric tests.}, year = {2009}, note = {PDF File, 142 K}, address = { D. J. Weiss (Ed.), Proceedings of the 2009 GMAC Conference on Computerized Adaptive Testing.}, author = {Baumer, M and Roded, K and Gafni, N.} } @inbook {1836, title = {An automatic online calibration design in adaptive testing}, year = {2009}, note = {{PDF file, 365 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2009 GMAC Conference on Computerized Adaptive Testing.}, author = {Makransky, G.} } @inbook {1899, title = {A burdened CAT: Incorporating response burden with maximum Fisher{\textquoteright}s information for item selection}, year = {2009}, note = {{PDF File, 374 KB}}, address = {In D. J. Weiss (Ed.), Proceedings of the 2009 GMAC Conference on Computerized Adaptive Testing.}, abstract = {Widely used in various educational and vocational assessment applications, computerized adaptive testing (CAT) has recently begun to be used to measure patient-reported outcomes Although successful in reducing respondent burden, most current CAT algorithms do not formally consider it as part of the item selection process. This study used a loss function approach motivated by decision theory to develop an item selection method that incorporates respondent burden into the item selection process based on maximum Fisher information item selection. Several different loss functions placing varying degrees of importance on respondent burden were compared, using an item bank of 62 polytomous items measuring depressive symptoms. One dataset consisted of the real responses from the 730 subjects who responded to all the items. A second dataset consisted of simulated responses to all the items based on a grid of latent trait scores with replicates at each grid point. The algorithm enables a CAT administrator to more efficiently control the respondent burden without severely affecting the measurement precision than when using MFI alone. In particular, the loss function incorporating respondent burden protected respondents from receiving longer tests when their estimated trait score fell in a region where there were few informative items. }, author = {Swartz, R.J.. and Choi, S. W.} } @conference {2035, title = {Comparing methods to recalibrate drifting items in computerized adaptive testing}, booktitle = {American Educational Research Association}, year = {2009}, address = {San Diego, CA}, author = {Masters, J. S. and Muckle, T. J. and Bontempo, B} } @inbook {1755, title = {Comparison of ability estimation and item selection methods in multidimensional computerized adaptive testing}, year = {2009}, note = {{PDF File, 342 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2009 GMAC Conference on Computerized Adaptive Testing.}, author = {Diao, Q. and Reckase, M.} } @inbook {1743, title = {Comparison of adaptive Bayesian estimation and weighted Bayesian estimation in multidimensional computerized adaptive testing}, year = {2009}, note = {{PDF file, 308KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2009 GMAC Conference on Computerized Adaptive Testing.}, author = {Chen, P. H.} } @article {2157, title = {Comparison of CAT Item Selection Criteria for Polytomous Items}, journal = {Applied Psychological Measurement}, volume = {33}, number = {6}, year = {2009}, pages = {419-440}, abstract = {

Item selection is a core component in computerized adaptive testing (CAT). Several studies have evaluated new and classical selection methods; however, the few that have applied such methods to the use of polytomous items have reported conflicting results. To clarify these discrepancies and further investigate selection method properties, six different selection methods are compared systematically. The results showed no clear benefit from more sophisticated selection criteria and showed one method previously believed to be superior\—the maximum expected posterior weighted information (MEPWI)\—to be mathematically equivalent to a simpler method, the maximum posterior weighted information (MPWI).

}, doi = {10.1177/0146621608327801}, url = {http://apm.sagepub.com/content/33/6/419.abstract}, author = {Choi, Seung W. and Swartz, Richard J.} } @article {511, title = {Comparison of CAT item selection criteria for polytomous items}, journal = {Applied Psychological Measurement}, volume = {33}, year = {2009}, pages = {419{\textendash}440}, author = {Choi, S. W. and Swartz, R.J..} } @article {15, title = {Comparison of methods for controlling maximum exposure rates in computerized adaptive testing}, journal = {Psicothema}, volume = {21}, number = {2}, year = {2009}, note = {Barrada, Juan RamonAbad, Francisco JoseVeldkamp, Bernard PComparative StudySpainPsicothemaPsicothema. 2009 May;21(2):313-20.}, month = {May}, pages = {313-320}, edition = {2009/05/01}, abstract = {This paper has two objectives: (a) to provide a clear description of three methods for controlling the maximum exposure rate in computerized adaptive testing {\textemdash}the Symson-Hetter method, the restricted method, and the item-eligibility method{\textemdash} showing how all three can be interpreted as methods for constructing the variable sub-bank of items from which each examinee receives the items in his or her test; (b) to indicate the theoretical and empirical limitations of each method and to compare their performance. With the three methods, we obtained basically indistinguishable results in overlap rate and RMSE (differences in the third decimal place). The restricted method is the best method for controlling exposure rate, followed by the item-eligibility method. The worst method is the Sympson-Hetter method. The restricted method presents problems of sequential overlap rate. Our advice is to use the item-eligibility method, as it saves time and satisfies the goals of restricting maximum exposure. Comparaci{\'o}n de m{\'e}todos para el control de tasa m{\'a}xima en tests adaptativos informatizados. Este art{\'\i}culo tiene dos objetivos: (a) ofrecer una descripci{\'o}n clara de tres m{\'e}todos para el control de la tasa m{\'a}xima en tests adaptativos informatizados, el m{\'e}todo Symson-Hetter, el m{\'e}todo restringido y el m{\'e}todode elegibilidad del {\'\i}tem, mostrando c{\'o}mo todos ellos pueden interpretarse como m{\'e}todos para la construcci{\'o}n del subbanco de {\'\i}tems variable, del cual cada examinado recibe los {\'\i}tems de su test; (b) se{\~n}alar las limitaciones te{\'o}ricas y emp{\'\i}ricas de cada m{\'e}todo y comparar sus resultados. Se obtienen resultados b{\'a}sicamente indistinguibles en tasa de solapamiento y RMSE con los tres m{\'e}todos (diferencias en la tercera posici{\'o}n decimal). El m{\'e}todo restringido es el mejor en el control de la tasa de exposici{\'o}n,seguido por el m{\'e}todo de elegibilidad del {\'\i}tem. El peor es el m{\'e}todo Sympson-Hetter. El m{\'e}todo restringido presenta un problema de solapamiento secuencial. Nuestra recomendaci{\'o}n ser{\'\i}a utilizar el m{\'e}todo de elegibilidad del {\'\i}tem, puesto que ahorra tiempo y satisface los objetivos de limitar la tasa m{\'a}xima de exposici{\'o}n.}, keywords = {*Numerical Analysis, Computer-Assisted, Psychological Tests/*standards/*statistics \& numerical data}, isbn = {0214-9915 (Print)0214-9915 (Linking)}, author = {Barrada, J and Abad, F. J. and Veldkamp, B. P.} } @inbook {1749, title = {A comparison of three methods of item selection for computerized adaptive testing}, year = {2009}, note = {PDF file, 531 K}, address = {D. J. Weiss (Ed.), Proceedings of the 2009 GMAC Conference on Computerized Adaptive Testing.}, author = {Costa, D. R. and Karino, C. A. and Moura, F. A. S. and Andrade, D. F.} } @inbook {1910, title = {Computerized adaptive testing by mutual information and multiple imputations}, year = {2009}, note = {{PDF file, 179 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2009 GMAC Conference on Computerized Adaptive Testing.}, abstract = {Over the years, most computerized adaptive testing (CAT) systems have used score estimation procedures from item response theory (IRT). IRT models have salutary properties for score estimation, error reporting, and next-item selection. However, some testing purposes favor scoring approaches outside IRT. Where a criterion metric is readily available and more relevant than the assessed construct, for example in the selection of job applicants, a predictive model might be appropriate (Scarborough \& Somers, 2006). In these cases, neither IRT scoring nor a unidimensional assessment structure can be assumed. Yet, the primary benefit of CAT remains desirable: shorter assessments with minimal loss of accuracy due to unasked items. In such a case, it remains possible to create a CAT system that produces an estimated score from a subset of available items, recognizes differential item information given the emerging item response pattern, and optimizes the accuracy of the score estimated at every successive item. The method of multiple imputations (Rubin, 1987) can be used to simulate plausible scores given plausible response patterns to unasked items (Thissen-Roe, 2005). Mutual information can then be calculated in order to select an optimally informative next item (or set of items). Previously observed response patterns to two complete neural network-scored assessments were resampled according to MIMI CAT item selection. The reproduced CAT scores were compared to full-length assessment scores. Approximately 95\% accurate assignment of examinees to one of three score categories was achieved with a 70\%-80\% reduction in median test length. Several algorithmic factors influencing accuracy and computational performance were examined.}, author = {Thissen-Roe, A.} } @inbook {1744, title = {Computerized adaptive testing for cognitive diagnosis}, year = {2009}, note = {{PDF File, 308 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2009 GMAC Conference on Computerized Adaptive Testing.}, author = {Cheng, Y} } @conference {1158, title = {Computerized adaptive testing using the two parameter logistic model with ability-based guessing}, booktitle = {Paper presented at the International Meeting of the Psychometric Society. Cambridge}, year = {2009}, author = {Shih, H.-J. and Wang, W-C.} } @inbook {1959, title = {Computerized classification testing in more than two categories by using stochastic curtailment}, year = {2009}, note = {{PDF file, 298 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2009 GMAC Conference on Computerized Adaptive Testing.}, author = {Wouda, J. T. and Theo Eggen} } @article {545, title = {A conditional exposure control method for multidimensional adaptive testing}, journal = {Journal of Educational Measurement}, volume = {46}, year = {2009}, pages = {84-103}, author = {Finkelman, M. and Nering, M. L. and Roussos, L. A.} } @article {2202, title = {A Conditional Exposure Control Method for Multidimensional Adaptive Testing}, journal = {Journal of Educational Measurement}, volume = {46}, number = {1}, year = {2009}, pages = {84{\textendash}103}, abstract = {

In computerized adaptive testing (CAT), ensuring the security of test items is a crucial practical consideration. A common approach to reducing item theft is to define maximum item exposure rates, i.e., to limit the proportion of examinees to whom a given item can be administered. Numerous methods for controlling exposure rates have been proposed for tests employing the unidimensional 3-PL model. The present article explores the issues associated with controlling exposure rates when a multidimensional item response theory (MIRT) model is utilized and exposure rates must be controlled conditional upon ability. This situation is complicated by the exponentially increasing number of possible ability values in multiple dimensions. The article introduces a new procedure, called the generalized Stocking-Lewis method, that controls the exposure rate for students of comparable ability as well as with respect to the overall population. A realistic simulation set compares the new method with three other approaches: Kullback-Leibler information with no exposure control, Kullback-Leibler information with unconditional Sympson-Hetter exposure control, and random item selection.

}, issn = {1745-3984}, doi = {10.1111/j.1745-3984.2009.01070.x}, url = {http://dx.doi.org/10.1111/j.1745-3984.2009.01070.x}, author = {Matthew Finkelman and Nering, Michael L. and Roussos, Louis A.} } @article {320, title = {Considerations about expected a posteriori estimation in adaptive testing: adaptive a priori, adaptive correction for bias, and adaptive integration interval}, journal = {Journal of Applied Measurement}, volume = {10}, number = {2}, year = {2009}, note = {Raiche, GillesBlais, Jean-GuyUnited StatesJournal of applied measurementJ Appl Meas. 2009;10(2):138-56.}, pages = {138-56}, edition = {2009/07/01}, abstract = {In a computerized adaptive test, we would like to obtain an acceptable precision of the proficiency level estimate using an optimal number of items. Unfortunately, decreasing the number of items is accompanied by a certain degree of bias when the true proficiency level differs significantly from the a priori estimate. The authors suggest that it is possible to reduced the bias, and even the standard error of the estimate, by applying to each provisional estimation one or a combination of the following strategies: adaptive correction for bias proposed by Bock and Mislevy (1982), adaptive a priori estimate, and adaptive integration interval.}, keywords = {*Bias (Epidemiology), *Computers, Data Interpretation, Statistical, Models, Statistical}, isbn = {1529-7713 (Print)1529-7713 (Linking)}, author = {Raiche, G. and Blais, J. G.} } @inbook {1960, title = {Constrained item selection using a stochastically curtailed SPRT}, year = {2009}, note = {{PDF File, 298 KB}{PDF File, 298 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2009 GMAC Conference on Computerized Adaptive Testing.}, author = {Wouda, J. T. and Theo Eggen} } @article {2182, title = {Constraint-Weighted a-Stratification for Computerized Adaptive Testing With Nonstatistical Constraints}, journal = {Educational and Psychological Measurement}, volume = {69}, number = {1}, year = {2009}, pages = {35-49}, abstract = {

a-stratification is a method that utilizes items with small discrimination (a) parameters early in an exam and those with higher a values when more is learned about the ability parameter. It can achieve much better item usage than the maximum information criterion (MIC). To make a-stratification more practical and more widely applicable, a method for weighting the item selection process in a-stratification as a means of satisfying multiple test constraints is proposed. This method is studied in simulation against an analogous method without stratification as well as a-stratification using descending-rather than ascending-a procedures. In addition, a variation of a-stratification that allows for unbalanced usage of a parameters is included in the study to examine the trade-off between efficiency and exposure control. Finally, MIC and randomized item selection are included as baseline measures. Results indicate that the weighting mechanism successfully addresses the constraints, that stratification helps to a great extent balancing exposure rates, and that the ascending-a design improves measurement precision.

}, doi = {10.1177/0013164408322030}, url = {http://epm.sagepub.com/content/69/1/35.abstract}, author = {Ying Cheng, and Chang, Hua-Hua and Douglas, Jeffrey and Fanmin Guo,} } @article {507, title = {Constraint-weighted a-stratification for computerized adaptive testing with nonstatistical constraints: Balancing measurement efficiency and exposure control}, journal = {Educational and Psychological Measurement}, volume = {69}, year = {2009}, pages = {35-49}, author = {Cheng, Y and Chang, Hua-Hua and Douglas, J. and Guo, F.} } @inbook {1884, title = {Criterion-related validity of an innovative CAT-based personality measure}, year = {2009}, note = {{PDF File, 163 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2009 GMAC Conference on Computerized Adaptive Testing.}, abstract = {This paper describes development and initial criterion-related validation of the PreVisor Computer Adaptive Personality Scales (PCAPS), a computerized adaptive testing-based personality measure that uses an ideal point IRT model based on forced-choice, paired-comparison responses. Based on results from a large consortium study, a composite of six PCAPS scales identified as relevant to the population of interest (first-line supervisors) had an estimated operational validity against an overall job performance criterion of ρ = .25. Uncorrected and corrected criterion-related validity results for each of the six PCAPS scales making up the composite are also reported. Because the PCAPS algorithm computes intermediate scale scores until a stopping rule is triggered, we were able to graph number of statement-pairs presented against criterion-related validities. Results showed generally monotonically increasing functions. However, asymptotic validity levels, or at least a reduction in the rate of increase in slope, were often reached after 5-7 statement-pairs were presented. In the case of the composite measure, there was some evidence that validities decreased after about six statement-pairs. A possible explanation for this is provided.}, author = {Schneider, R. J. and McLellan, R. A. and Kantrowitz, T. M. and Houston, J. S. and Borman, W. C.} } @inbook {1954, title = {Developing item variants: An empirical study}, year = {2009}, note = {{PDF file, 194 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2009 GMAC Conference on Computerized Adaptive Testing.}, abstract = {Large-scale standardized test have been widely used for educational and licensure testing. In computerized adaptive testing (CAT), one of the practical concerns for maintaining large-scale assessments is to ensure adequate numbers of high-quality items that are required for item pool functioning. Developing items at specific difficulty levels and for certain areas of test plans is a wellknown challenge. The purpose of this study was to investigate strategies for varying items that can effectively generate items at targeted difficulty levels and specific test plan areas. Each variant item generation model was developed by decomposing selected source items possessing ideal measurement properties and targeting the desirable content domains. 341 variant items were generated from 72 source items. Data were collected from six pretest periods. Items were calibrated using the Rasch model. Initial results indicate that variant items showed desirable measurement properties. Additionally, compared to an average of approximately 60\% of the items passing pretest criteria, an average of 84\% of the variant items passed the pretest criteria. }, author = {Wendt, A. and Kao, S. and Gorham, J. and Woo, A.} } @article {7, title = {Development and preliminary testing of a computerized adaptive assessment of chronic pain}, journal = {Journal of Pain}, volume = {10}, number = {9}, year = {2009}, note = {Anatchkova, Milena DSaris-Baglama, Renee NKosinski, MarkBjorner, Jakob B1R43AR052251-01A1/AR/NIAMS NIH HHS/United StatesEvaluation StudiesResearch Support, N.I.H., ExtramuralUnited StatesThe journal of pain : official journal of the American Pain SocietyJ Pain. 2009 Sep;10(9):932-43.}, month = {Sep}, pages = {932-943}, edition = {2009/07/15}, abstract = {The aim of this article is to report the development and preliminary testing of a prototype computerized adaptive test of chronic pain (CHRONIC PAIN-CAT) conducted in 2 stages: (1) evaluation of various item selection and stopping rules through real data-simulated administrations of CHRONIC PAIN-CAT; (2) a feasibility study of the actual prototype CHRONIC PAIN-CAT assessment system conducted in a pilot sample. Item calibrations developed from a US general population sample (N = 782) were used to program a pain severity and impact item bank (kappa = 45), and real data simulations were conducted to determine a CAT stopping rule. The CHRONIC PAIN-CAT was programmed on a tablet PC using QualityMetric{\textquoteright}s Dynamic Health Assessment (DYHNA) software and administered to a clinical sample of pain sufferers (n = 100). The CAT was completed in significantly less time than the static (full item bank) assessment (P < .001). On average, 5.6 items were dynamically administered by CAT to achieve a precise score. Scores estimated from the 2 assessments were highly correlated (r = .89), and both assessments discriminated across pain severity levels (P < .001, RV = .95). Patients{\textquoteright} evaluations of the CHRONIC PAIN-CAT were favorable. PERSPECTIVE: This report demonstrates that the CHRONIC PAIN-CAT is feasible for administration in a clinic. The application has the potential to improve pain assessment and help clinicians manage chronic pain.}, keywords = {*Computers, *Questionnaires, Activities of Daily Living, Adaptation, Psychological, Chronic Disease, Cohort Studies, Disability Evaluation, Female, Humans, Male, Middle Aged, Models, Psychological, Outcome Assessment (Health Care), Pain Measurement/*methods, Pain, Intractable/*diagnosis/psychology, Psychometrics, Quality of Life, User-Computer Interface}, isbn = {1528-8447 (Electronic)1526-5900 (Linking)}, author = {Anatchkova, M. D. and Saris-Baglama, R. N. and Kosinski, M. and Bjorner, J. B.} } @article {138, title = {Development of an item bank for the assessment of depression in persons with mental illnesses and physical diseases using Rasch analysis}, journal = {Rehabilitation Psychology}, volume = {54}, number = {2}, year = {2009}, note = {Forkmann, ThomasBoecker, MarenNorra, ChristineEberle, NicoleKircher, TiloSchauerte, PatrickMischke, KarlWesthofen, MartinGauggel, SiegfriedWirtz, MarkusResearch Support, Non-U.S. Gov{\textquoteright}tUnited StatesRehabilitation psychologyRehabil Psychol. 2009 May;54(2):186-97.}, month = {May}, pages = {186-97}, edition = {2009/05/28}, abstract = {OBJECTIVE: The calibration of item banks provides the basis for computerized adaptive testing that ensures high diagnostic precision and minimizes participants{\textquoteright} test burden. The present study aimed at developing a new item bank that allows for assessing depression in persons with mental and persons with somatic diseases. METHOD: The sample consisted of 161 participants treated for a depressive syndrome, and 206 participants with somatic illnesses (103 cardiologic, 103 otorhinolaryngologic; overall mean age = 44.1 years, SD =14.0; 44.7\% women) to allow for validation of the item bank in both groups. Persons answered a pool of 182 depression items on a 5-point Likert scale. RESULTS: Evaluation of Rasch model fit (infit < 1.3), differential item functioning, dimensionality, local independence, item spread, item and person separation (>2.0), and reliability (>.80) resulted in a bank of 79 items with good psychometric properties. CONCLUSIONS: The bank provides items with a wide range of content coverage and may serve as a sound basis for computerized adaptive testing applications. It might also be useful for researchers who wish to develop new fixed-length scales for the assessment of depression in specific rehabilitation settings.}, keywords = {Adaptation, Psychological, Adult, Aged, Depressive Disorder/*diagnosis/psychology, Diagnosis, Computer-Assisted, Female, Heart Diseases/*psychology, Humans, Male, Mental Disorders/*psychology, Middle Aged, Models, Statistical, Otorhinolaryngologic Diseases/*psychology, Personality Assessment/statistics \& numerical data, Personality Inventory/*statistics \& numerical data, Psychometrics/statistics \& numerical data, Questionnaires, Reproducibility of Results, Sick Role}, isbn = {0090-5550 (Print)0090-5550 (Linking)}, author = {Forkmann, T. and Boecker, M. and Norra, C. and Eberle, N. and Kircher, T. and Schauerte, P. and Mischke, K. and Westhofen, M. and Gauggel, S. and Wirtz, M.} } @article {2269, title = {Diagnostic classification models and multidimensional adaptive testing: A commentary on Rupp and Templin.}, journal = {Measurement: Interdisciplinary Research and Perspectives}, volume = {7}, year = {2009}, pages = {58-61}, author = {Frey, A. and Carstensen, C. H.} } @article {458, title = { Direct and inverse problems of item pool design for computerized adaptive testing}, journal = {Educational and Psychological Measurement}, volume = {69}, year = {2009}, pages = {533-547}, author = {Belov, D. I. and Armstrong, R. D.} } @article {2183, title = {Direct and Inverse Problems of Item Pool Design for Computerized Adaptive Testing}, journal = {Educational and Psychological Measurement}, volume = {69}, number = {4}, year = {2009}, pages = {533-547}, abstract = {

The recent literature on computerized adaptive testing (CAT) has developed methods for creating CAT item pools from a large master pool. Each CAT pool is designed as a set of nonoverlapping forms reflecting the skill levels of an assumed population of test takers. This article presents a Monte Carlo method to obtain these CAT pools and discusses its advantages over existing methods. Also, a new problem is considered that finds a population ability density function best matching the master pool. An analysis of the solution to this new problem provides testing organizations with effective guidance for maintaining their master pools. Computer experiments with a pool of Law School Admission Test items and its assembly constraints are presented.

}, doi = {10.1177/0013164409332224}, url = {http://epm.sagepub.com/content/69/4/533.abstract}, author = {Belov, Dmitry I. and Armstrong, Ronald D.} } @inbook {1787, title = {Effect of early misfit in computerized adaptive testing on the recovery of theta}, year = {2009}, note = {{PDF File, 212 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2009 GMAC Conference on Computerized Adaptive Testing.}, author = {Guyer, R. D. and Weiss, D. J.} } @article {2270, title = {Effekte des adaptiven Testens auf die Moti{\textlnot}vation zur Testbearbeitung [Effects of adaptive testing on test taking motivation].}, journal = {Diagnostica}, volume = {55}, year = {2009}, pages = {20-28}, author = {Frey, A. and Hartig, J. and Moosbrugger, H.} } @article {131, title = {Evaluation of a computer-adaptive test for the assessment of depression (D-CAT) in clinical application}, journal = {International Journal for Methods in Psychiatric Research}, volume = {18}, number = {1}, year = {2009}, note = {Journal articleInternational journal of methods in psychiatric researchInt J Methods Psychiatr Res. 2009 Feb 4.}, month = {Feb 4}, pages = {233-236}, edition = {2009/02/06}, abstract = {In the past, a German Computerized Adaptive Test, based on Item Response Theory (IRT), was developed for purposes of assessing the construct depression [Computer-adaptive test for depression (D-CAT)]. This study aims at testing the feasibility and validity of the real computer-adaptive application.The D-CAT, supplied by a bank of 64 items, was administered on personal digital assistants (PDAs) to 423 consecutive patients suffering from psychosomatic and other medical conditions (78 with depression). Items were adaptively administered until a predetermined reliability (r >/= 0.90) was attained. For validation purposes, the Hospital Anxiety and Depression Scale (HADS), the Centre for Epidemiological Studies Depression (CES-D) scale, and the Beck Depression Inventory (BDI) were administered. Another sample of 114 patients was evaluated using standardized diagnostic interviews [Composite International Diagnostic Interview (CIDI)].The D-CAT was quickly completed (mean 74 seconds), well accepted by the patients and reliable after an average administration of only six items. In 95\% of the cases, 10 items or less were needed for a reliable score estimate. Correlations between the D-CAT and the HADS, CES-D, and BDI ranged between r = 0.68 and r = 0.77. The D-CAT distinguished between diagnostic groups as well as established questionnaires do.The D-CAT proved an efficient, well accepted and reliable tool. Discriminative power was comparable to other depression measures, whereby the CAT is shorter and more precise. Item usage raises questions of balancing the item selection for content in the future. Copyright (c) 2009 John Wiley \& Sons, Ltd.}, isbn = {1049-8931 (Print)}, author = {Fliege, H. and Becker, J. and Walter, O. B. and Rose, M. and Bjorner, J. B. and Klapp, B. F.} } @inbook {1800, title = {An evaluation of a new procedure for computing information functions for Bayesian scores from computerized adaptive tests}, year = {2009}, note = {{PDF file, 571 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2009 GMAC Conference on Computerized Adaptive Testing.}, author = {Ito, K. and Pommerich, M and Segall, D.} } @article {227, title = {An evaluation of patient-reported outcomes found computerized adaptive testing was efficient in assessing stress perception}, journal = {Journal of Clinical Epidemiology}, volume = {62}, number = {3}, year = {2009}, note = {Kocalevent, Ruya-DanielaRose, MatthiasBecker, JanineWalter, Otto BFliege, HerbertBjorner, Jakob BKleiber, DieterKlapp, Burghard FEvaluation StudiesUnited StatesJournal of clinical epidemiologyJ Clin Epidemiol. 2009 Mar;62(3):278-87, 287.e1-3. Epub 2008 Jul 18.}, pages = {278-287}, edition = {2008/07/22}, abstract = {OBJECTIVES: This study aimed to develop and evaluate a first computerized adaptive test (CAT) for the measurement of stress perception (Stress-CAT), in terms of the two dimensions: exposure to stress and stress reaction. STUDY DESIGN AND SETTING: Item response theory modeling was performed using a two-parameter model (Generalized Partial Credit Model). The evaluation of the Stress-CAT comprised a simulation study and real clinical application. A total of 1,092 psychosomatic patients (N1) were studied. Two hundred simulees (N2) were generated for a simulated response data set. Then the Stress-CAT was given to n=116 inpatients, (N3) together with established stress questionnaires as validity criteria. RESULTS: The final banks included n=38 stress exposure items and n=31 stress reaction items. In the first simulation study, CAT scores could be estimated with a high measurement precision (SE<0.32; rho>0.90) using 7.0+/-2.3 (M+/-SD) stress reaction items and 11.6+/-1.7 stress exposure items. The second simulation study reanalyzed real patients data (N1) and showed an average use of items of 5.6+/-2.1 for the dimension stress reaction and 10.0+/-4.9 for the dimension stress exposure. Convergent validity showed significantly high correlations. CONCLUSIONS: The Stress-CAT is short and precise, potentially lowering the response burden of patients in clinical decision making.}, keywords = {*Diagnosis, Computer-Assisted, Adolescent, Adult, Aged, Aged, 80 and over, Confidence Intervals, Female, Humans, Male, Middle Aged, Perception, Quality of Health Care/*standards, Questionnaires, Reproducibility of Results, Sickness Impact Profile, Stress, Psychological/*diagnosis/psychology, Treatment Outcome}, isbn = {1878-5921 (Electronic)0895-4356 (Linking)}, author = {Kocalevent, R. D. and Rose, M. and Becker, J. and Walter, O. B. and Fliege, H. and Bjorner, J. B. and Kleiber, D. and Klapp, B. F.} } @inbook {1879, title = {An examination of decision-theory adaptive testing procedures}, year = {2009}, note = {{PDF file, 203 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2009 GMAC Conference on Computerized Adaptive Testing.}, abstract = {This research examined three ways to adaptively select items using decision theory: a traditional decision theory sequential testing approach (expected minimum cost), information gain (modeled after Kullback-Leibler), and a maximum discrimination approach, and then compared them all against an approach using maximum IRT Fisher information. It also examined the use of Wald{\textquoteright}s (1947) wellknown sequential probability ratio test, SPRT, as a test termination rule in this context. The minimum cost approach was notably better than the best-case possibility for IRT. Information gain, which is based on entropy and comes from information theory, was almost identical to minimum cost. The simple approach using the item that best discriminates between the two most likely classifications also fared better than IRT, but not as well as information gain or minimum cost. Through Wald{\textquoteright}s SPRT, large percentages of examinees can be accurately classified with very few items. With only 25 sequentially selected items, for example, approximately 90\% of the simulated NAEP examinees were classified with 86\% accuracy. The advantages of the decision theory model are many{\textemdash}the model yields accurate mastery state classifications, can use a small item pool, is simple to implement, requires little pretesting, is applicable to criterion-referenced tests, can be used in diagnostic testing, can be adapted to yield classifications on multiple skills, and should be easy to explain to non-statisticians.}, author = {Rudner, L. M.} } @inbook {1798, title = {Features of J-CAT (Japanese Computerized Adaptive Test)}, year = {2009}, note = {{PDF File, 655KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2009 GMAC Conference on Computerized Adaptive Testing.}, author = {Imai, S. and Ito, S. and Nakamura, Y. and Kikuchi, K. and Akagi, Y. and Nakasono, H. and Honda, A. and Hiramura, T.} } @article {509, title = {Firestar: Computerized adaptive testing simulation program for polytomous IRT models}, journal = {Applied Psychological Measurement}, volume = {33}, year = {2009}, pages = {644{\textendash}645}, author = {Choi, S. W.} } @article {79, title = {Firestar: Computerized adaptive testing simulation program for polytomous IRT models}, journal = {Applied Psychological Measurement}, volume = {33}, number = {8}, year = {2009}, note = {U01 AR052177-04/NIAMS NIH HHS/United StatesJournal articleApplied psychological measurementAppl Psychol Meas. 2009 Nov 1;33(8):644-645.}, month = {Nov 1}, pages = {644-645}, edition = {2009/12/17}, isbn = {1552-3497 (Electronic)0146-6216 (Linking)}, author = {Choi, S. W.} } @inbook {1789, title = {A gradual maximum information ratio approach to item selection in computerized adaptive testing}, year = {2009}, note = {{PDF file, 391 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2009 GMAC Conference on Computerized Adaptive Testing.}, author = {Han, K. T.} } @article {182, title = {Gradual maximum information ratio approach to item selection in computerized adaptive testing}, number = {RR-09-07}, year = {2009}, institution = {Graduate Management Admissions Council}, address = {McLean, VA. USA}, author = {Han, K. T.} } @inbook {1905, title = {Guess what? Score differences with rapid replies versus omissions on a computerized adaptive test}, year = {2009}, note = {{PDF File, 215 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2009 GMAC Conference on Computerized Adaptive Testing.}, author = {Talento-Miller, E. and Guo, F.} } @inbook {1862, title = {A hybrid simulation procedure for the development of CATs}, year = {2009}, note = {{PDF File, 258 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2009 GMAC Conference on Computerized Adaptive Testing.}, author = {Nydick, S. W. and Weiss, D. J.} } @article {330, title = {Item response theory and clinical measurement}, journal = {Annual Review of Clinical Psychology}, volume = {5}, year = {2009}, note = {Reise, Steven PWaller, Niels GU01 AR 52177/AR/NIAMS NIH HHS/United StatesResearch Support, N.I.H., ExtramuralReviewUnited StatesAnnual review of clinical psychologyAnnu Rev Clin Psychol. 2009;5:27-48.}, pages = {27-48}, edition = {2008/11/04}, abstract = {In this review, we examine studies that use item response theory (IRT) to explore the psychometric properties of clinical measures. Next, we consider how IRT has been used in clinical research for: scale linking, computerized adaptive testing, and differential item functioning analysis. Finally, we consider the scale properties of IRT trait scores. We conclude that there are notable differences between cognitive and clinical measures that have relevance for IRT modeling. Future research should be directed toward a better understanding of the metric of the latent trait and the psychological processes that lead to individual differences in item response behaviors.}, keywords = {*Psychological Theory, Humans, Mental Disorders/diagnosis/psychology, Psychological Tests, Psychometrics, Quality of Life, Questionnaires}, isbn = {1548-5951 (Electronic)}, author = {Reise, S. P. and Waller, N. G.} } @inbook {1766, title = {Item selection and hypothesis testing for the adaptive measurement of change}, year = {2009}, note = {{PDF File, 228 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2009 GMAC Conference on Computerized Adaptive Testing.}, author = {Finkelman, M. and Weiss, D. J. and Kim-Kang, G.} } @article {388, title = {Item selection in computerized classification testing}, journal = {Educational and Psychological Measurement}, volume = {69}, number = {5}, year = {2009}, pages = {778-793}, abstract = {Several alternatives for item selection algorithms based on item response theory in computerized classification testing (CCT) have been suggested, with no conclusive evidence on the substantial superiority of a single method. It is argued that the lack of sizable effect is because some of the methods actually assess items very similarly through different calculations and will usually select the same item. Consideration of methods that assess information across a wider range is often unnecessary under realistic conditions, although it might be advantageous to utilize them only early in a test. In addition, the efficiency of item selection approaches depend on the termination criteria that are used, which is demonstrated through didactic example and Monte Carlo simulation. Item selection at the cut score, which seems conceptually appropriate for CCT, is not always the most efficient option. A broad framework for item selection in CCT is presented that incorporates these points. }, isbn = {0013-1644}, author = {Thompson, N. A.} } @article {2184, title = {Item Selection in Computerized Classification Testing}, journal = {Educational and Psychological Measurement}, volume = {69}, number = {5}, year = {2009}, pages = {778-793}, abstract = {

Several alternatives for item selection algorithms based on item response theory in computerized classification testing (CCT) have been suggested, with no conclusive evidence on the substantial superiority of a single method. It is argued that the lack of sizable effect is because some of the methods actually assess items very similarly through different calculations and will usually select the same item. Consideration of methods that assess information across a wider range is often unnecessary under realistic conditions, although it might be advantageous to utilize them only early in a test. In addition, the efficiency of item selection approaches depend on the termination criteria that are used, which is demonstrated through didactic example and Monte Carlo simulation. Item selection at the cut score, which seems conceptually appropriate for CCT, is not always the most efficient option. A broad framework for item selection in CCT is presented that incorporates these points.

}, doi = {10.1177/0013164408324460}, url = {http://epm.sagepub.com/content/69/5/778.abstract}, author = {Thompson, Nathan A.} } @article {453, title = {Item selection rules in computerized adaptive testing: Accuracy and security}, journal = {Methodology}, volume = {5}, year = {2009}, note = {(PDF file, 445 KB) }, pages = {7-17}, author = {Barrada, J and Olea, J. and Ponsoda, V. and Abad, F. J.} } @inbook {1893, title = {Item selection with biased-coin up-and-down designs}, year = {2009}, note = {{PDF file, 748 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2009 GMAC Conference on Computerized Adaptive Testing.}, author = {Sheng, Y. and Sheng, Z.} } @article {2156, title = {I{\textquoteright}ve Fallen and I Can{\textquoteright}t Get Up: Can High-Ability Students Recover From Early Mistakes in CAT?}, journal = {Applied Psychological Measurement}, volume = {33}, number = {2}, year = {2009}, pages = {83-101}, abstract = {

A difficult result to interpret in Computerized Adaptive Tests (CATs) occurs when an ability estimate initially drops and then ascends continuously until the test ends, suggesting that the true ability may be higher than implied by the final estimate. This study explains why this asymmetry occurs and shows that early mistakes by high-ability students can lead to considerable underestimation, even in tests with 45 items. The opposite response pattern, where low-ability students start with lucky guesses, leads to much less bias. The authors show that using Barton and Lord\&$\#$39;s four-parameter model (4PM) and a less informative prior can lower bias and root mean square error (RMSE) for high-ability students with a poor start, as the CAT algorithm ascends more quickly after initial underperformance. Results also show that the 4PM slightly outperforms a CAT in which less discriminating items are initially used. The practical implications and relevance for psychological measurement more generally are discussed.

}, doi = {10.1177/0146621608324023}, url = {http://apm.sagepub.com/content/33/2/83.abstract}, author = {Rulison, Kelly L. and Loken, Eric} } @article {691, title = {I{\textquoteright}ve fallen and I can{\textquoteright}t get up: can high-ability students recover from early mistakes in CAT?}, journal = {Applied Psychological Measurement}, volume = {33(2)}, year = {2009}, pages = {83-101}, author = {Rulison, K., and Loken, E.} } @article {2155, title = {I{\textquoteright}ve Fallen and I Can{\textquoteright}t Get Up: Can High-Ability Students Recover From Early Mistakes in CAT?}, journal = {Applied Psychological Measurement}, volume = {33}, number = {2}, year = {2009}, pages = {83-101}, abstract = {

A difficult result to interpret in Computerized Adaptive Tests (CATs) occurs when an ability estimate initially drops and then ascends continuously until the test ends, suggesting that the true ability may be higher than implied by the final estimate. This study explains why this asymmetry occurs and shows that early mistakes by high-ability students can lead to considerable underestimation, even in tests with 45 items. The opposite response pattern, where low-ability students start with lucky guesses, leads to much less bias. The authors show that using Barton and Lord\&$\#$39;s four-parameter model (4PM) and a less informative prior can lower bias and root mean square error (RMSE) for high-ability students with a poor start, as the CAT algorithm ascends more quickly after initial underperformance. Results also show that the 4PM slightly outperforms a CAT in which less discriminating items are initially used. The practical implications and relevance for psychological measurement more generally are discussed.

}, doi = {10.1177/0146621608324023}, url = {http://apm.sagepub.com/content/33/2/83.abstract}, author = {Rulison, Kelly L. and Loken, Eric} } @article {2242, title = {A Knowledge-Based Approach for Item Exposure Control in Computerized Adaptive Testing}, journal = {Journal of Educational and Behavioral Statistics}, volume = {34}, number = {4}, year = {2009}, pages = {530-558}, abstract = {

The purpose of this study is to investigate a functional relation between item exposure parameters (IEPs) and item parameters (IPs) over parallel pools. This functional relation is approximated by a well-known tool in machine learning. Let P and Q be parallel item pools and suppose IEPs for P have been obtained via a Sympson and Hetter\–type simulation. Based on these simulated parameters, a functional relation k = fP (a, b, c) relating IPs to IEPs of P is obtained by an artificial neural network and used to estimate IEPs of Q without tedious simulation. Extensive experiments using real and synthetic pools showed that this approach worked pretty well for many variants of the Sympson and Hetter procedure. It worked excellently for the conditional Stocking and Lewis multinomial selection procedure and the Chen and Lei item exposure and test overlap control procedure. This study provides the first step in an alternative means to estimate IEPs without iterative simulation.

}, doi = {10.3102/1076998609336667}, url = {http://jeb.sagepub.com/cgi/content/abstract/34/4/530}, author = {Doong, S. H.} } @inbook {1941, title = {Kullback-Leibler information in multidimensional adaptive testing: theory and application}, year = {2009}, note = {{PDF file, 316 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2009 GMAC Conference on Computerized Adaptive Testing.}, abstract = {Built on multidimensional item response theory (MIRT), multidimensional adaptive testing (MAT) can, in principle, provide a promising choice to ensuring efficient estimation of each ability dimension in a multidimensional vector. Currently, two item selection procedures have been developed for MAT, one based on Fisher information embedded within a Bayesian framework, and the other powered by Kullback-Leibler (KL) information. It is well-known that in unidimensional IRT that the second derivative of KL information (also termed {\textquotedblleft}global information{\textquotedblright}) is Fisher information evaluated atθ 0. This paper first generalizes the relationship between these two types of information in two ways{\textemdash}the analytical result is given as well as the graphical representation, to enhance interpretation and understanding. Second, a KL information index is constructed for MAT, which represents the integration of KL nformation over all of the ability dimensions. This paper further discusses how this index correlates with the item discrimination parameters. The analytical results would lay foundation for future development of item selection methods in MAT which can help equalize the item exposure rate. Finally, a simulation study is conducted to verify the above results. The connection between the item parameters, item KL information, and item exposure rate is demonstrated for empirical MAT delivered by an item bank calibrated under two-dimensional IRT.}, author = {Wang, C. and Chang, Hua-Hua} } @inbook {1822, title = {Limiting item exposure for target difficulty ranges in a high-stakes CAT}, year = {2009}, note = {MB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2009 GMAC Conference on Computerized Adaptive Testing. {PDF File, 1.}, author = {Li, X. and Becker, K. and Gorham, J. and Woo, A.} } @article {342, title = {Logistics of collecting patient-reported outcomes (PROs) in clinical practice: an overview and practical examples}, journal = {Quality of Life Research}, volume = {18}, number = {1}, year = {2009}, note = {Rose, MatthiasBezjak, AndreaNetherlandsQuality of life research : an international journal of quality of life aspects of treatment, care and rehabilitationQual Life Res. 2009 Feb;18(1):125-36. Epub 2009 Jan 20.}, month = {Feb}, pages = {125-36}, edition = {2009/01/20}, abstract = {PURPOSE: Interest in collecting patient-reported outcomes (PROs), such as health-related quality of life (HRQOL), health status reports, and patient satisfaction is on the rise and practical aspects of collecting PROs in clinical practice are becoming more important. The purpose of this paper is to draw the attention to a number of issues relevant for a successful integration of PRO measures into the daily work flow of busy clinical settings. METHODS: The paper summarizes the results from a breakout session held at an ISOQOL special topic conference for PRO measures in clinical practice in 2007. RESULTS: Different methodologies of collecting PROs are discussed, and the support needed for each methodology is highlighted. The discussion is illustrated by practical real-life examples from early adaptors who administered paper-pencil, or electronic PRO assessments (ePRO) for more than a decade. The paper also reports about new experiences with more recent technological developments, such as SmartPens and Computer Adaptive Tests (CATs) in daily practice. CONCLUSIONS: Methodological and logistical issues determine the resources needed for a successful integration of PRO measures into daily work flow procedures and influence significantly the usefulness of PRO data for clinical practice.}, isbn = {0962-9343 (Print)}, author = {Rose, M. and Bezjak, A.} } @article {77, title = {The maximum priority index method for severely constrained item selection in computerized adaptive testing}, journal = {British Journal of Mathematical and Statistical Psychology}, volume = {62}, number = {2}, year = {2009}, note = {Cheng, YingChang, Hua-HuaResearch Support, Non-U.S. Gov{\textquoteright}tEnglandThe British journal of mathematical and statistical psychologyBr J Math Stat Psychol. 2009 May;62(Pt 2):369-83. Epub 2008 Jun 2.}, month = {May}, pages = {369-83}, edition = {2008/06/07}, abstract = {This paper introduces a new heuristic approach, the maximum priority index (MPI) method, for severely constrained item selection in computerized adaptive testing. Our simulation study shows that it is able to accommodate various non-statistical constraints simultaneously, such as content balancing, exposure control, answer key balancing, and so on. Compared with the weighted deviation modelling method, it leads to fewer constraint violations and better exposure control while maintaining the same level of measurement precision.}, keywords = {Aptitude Tests/*statistics \& numerical data, Diagnosis, Computer-Assisted/*statistics \& numerical data, Educational Measurement/*statistics \& numerical data, Humans, Mathematical Computing, Models, Statistical, Personality Tests/*statistics \& numerical data, Psychometrics/*statistics \& numerical data, Reproducibility of Results, Software}, isbn = {0007-1102 (Print)0007-1102 (Linking)}, author = {Cheng, Y and Chang, Hua-Hua} } @article {170, title = {Measuring global physical health in children with cerebral palsy: Illustration of a multidimensional bi-factor model and computerized adaptive testing}, journal = {Quality of Life Research}, volume = {18}, number = {3}, year = {2009}, note = {Haley, Stephen MNi, PengshengDumas, Helene MFragala-Pinkham, Maria AHambleton, Ronald KMontpetit, KathleenBilodeau, NathalieGorton, George EWatson, KyleTucker, Carole AK02 HD045354-01A1/HD/NICHD NIH HHS/United StatesK02 HD45354-01A1/HD/NICHD NIH HHS/United StatesResearch Support, N.I.H., ExtramuralResearch Support, Non-U.S. Gov{\textquoteright}tNetherlandsQuality of life research : an international journal of quality of life aspects of treatment, care and rehabilitationQual Life Res. 2009 Apr;18(3):359-70. Epub 2009 Feb 17.}, month = {Apr}, pages = {359-370}, edition = {2009/02/18}, abstract = {PURPOSE: The purposes of this study were to apply a bi-factor model for the determination of test dimensionality and a multidimensional CAT using computer simulations of real data for the assessment of a new global physical health measure for children with cerebral palsy (CP). METHODS: Parent respondents of 306 children with cerebral palsy were recruited from four pediatric rehabilitation hospitals and outpatient clinics. We compared confirmatory factor analysis results across four models: (1) one-factor unidimensional; (2) two-factor multidimensional (MIRT); (3) bi-factor MIRT with fixed slopes; and (4) bi-factor MIRT with varied slopes. We tested whether the general and content (fatigue and pain) person score estimates could discriminate across severity and types of CP, and whether score estimates from a simulated CAT were similar to estimates based on the total item bank, and whether they correlated as expected with external measures. RESULTS: Confirmatory factor analysis suggested separate pain and fatigue sub-factors; all 37 items were retained in the analyses. From the bi-factor MIRT model with fixed slopes, the full item bank scores discriminated across levels of severity and types of CP, and compared favorably to external instruments. CAT scores based on 10- and 15-item versions accurately captured the global physical health scores. CONCLUSIONS: The bi-factor MIRT CAT application, especially the 10- and 15-item versions, yielded accurate global physical health scores that discriminated across known severity groups and types of CP, and correlated as expected with concurrent measures. The CATs have potential for collecting complex data on the physical health of children with CP in an efficient manner.}, keywords = {*Computer Simulation, *Health Status, *Models, Statistical, Adaptation, Psychological, Adolescent, Cerebral Palsy/*physiopathology, Child, Child, Preschool, Factor Analysis, Statistical, Female, Humans, Male, Massachusetts, Pennsylvania, Questionnaires, Young Adult}, isbn = {0962-9343 (Print)0962-9343 (Linking)}, author = {Haley, S. M. and Ni, P. and Dumas, H. M. and Fragala-Pinkham, M. A. and Hambleton, R. K. and Montpetit, K. and Bilodeau, N. and Gorton, G. E. and Watson, K. and Tucker, C. A.} } @inbook {1911, title = {The MEDPRO project: An SBIR project for a comprehensive IRT and CAT software system: CAT software}, year = {2009}, note = {{PDF File, 283 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2009 GMAC Conference on Computerized Adaptive Testing.}, abstract = {Development of computerized adaptive tests (CAT) requires a number of appropriate software tools. This paper describes the development of two new CAT software programs. CATSIM has been designed specifically to conduct several different kinds of simulation studies, which are necessary for planning purposes as well as properly designing live CATs. FastCAT is a software system for banking items and publishing CAT tests as standalone files, to be administered anywhere. Both are available for public use.}, author = {Thompson, N. A.} } @inbook {1908, title = {The MEDPRO project: An SBIR project for a comprehensive IRT and CAT software system: IRT software}, year = {2009}, note = {PDF File, 817 K}, address = {D. J. Weiss (Ed.), Proceedings of the 2009 GMAC Conference on Computerized Adaptive Testing.}, abstract = {IRTPRO (Item Response Theory for Patient-Reported Outcomes) is an entirely new application for item calibration and test scoring using IRT. IRTPRO implements algorithms for maximum likelihood estimation of item parameters (item calibration) for several unidimensional and multidimensional item response theory (IRT) models for dichotomous and polytomous item responses. In addition, the software provides computation of goodness-of-fit indices, statistics for the diagnosis of local dependence and for the detection of differential item functioning (DIF), and IRT scaled scores. This paper illustrates the use, and some capabilities, of the software.}, author = {Thissen, D.} } @article {112, title = {A mixed integer programming model for multiple stage adaptive testing}, journal = {European Journal of Operational Research}, volume = {193}, number = {2}, year = {2009}, note = {doi: DOI: 10.1016/j.ejor.2007.10.047}, pages = {342-350}, abstract = {The last decade has seen paper-and-pencil (P\&P) tests being replaced by computerized adaptive tests (CATs) within many testing programs. A CAT may yield several advantages relative to a conventional P\&P test. A CAT can determine the questions or test items to administer, allowing each test form to be tailored to a test taker{\textquoteright}s skill level. Subsequent items can be chosen to match the capability of the test taker. By adapting to a test taker{\textquoteright}s ability, a CAT can acquire more information about a test taker while administering fewer items. A Multiple Stage Adaptive test (MST) provides a means to implement a CAT that allows review before the administration. The MST format is a hybrid between the conventional P\&P and CAT formats. This paper presents mixed integer programming models for MST assembly problems. Computational results with commercial optimization software will be given and advantages of the models evaluated.}, keywords = {Education, Integer programming, Linear programming}, isbn = {0377-2217}, author = {Edmonds, J. and Armstrong, R. D.} } @article {140, title = {Multidimensional adaptive testing in educational and psychological measurement: Current state and future challenges}, journal = {Studies in Educational Evaluation}, volume = {35}, number = {2-3}, year = {2009}, pages = {89-94}, abstract = {The paper gives an overview of multidimensional adaptive testing (MAT) and evaluates its applicability in educational and psychological testing. The approach of Segall (1996) is described as a general framework for MAT. The main advantage of MAT is its capability to increase measurement efficiency. In simulation studies conceptualizing situations typical to large scale assessments, the number of presented items was reduced by MAT by about 30{\textendash}50\% compared to unidimensional adaptive testing and by about 70\% compared to fixed item testing holding measurement precision constant. Empirical results underline these findings. Before MAT is used routinely some open questions should be answered first. After that, MAT represents a very promising approach to highly efficient simultaneous testing of multiple competencies.}, isbn = {0191491X}, author = {Frey, A. and Seitz, N-N.} } @article {288, title = {Multidimensional Adaptive Testing with Optimal Design Criteria for Item Selection}, journal = {Psychometrika}, volume = {74}, number = {2}, year = {2009}, note = {Journal articlePsychometrikaPsychometrika. 2009 Jun;74(2):273-296. Epub 2008 Dec 23.}, month = {Jun}, pages = {273-296}, edition = {2010/02/02}, abstract = {Several criteria from the optimal design literature are examined for use with item selection in multidimensional adaptive testing. In particular, it is examined what criteria are appropriate for adaptive testing in which all abilities are intentional, some should be considered as a nuisance, or the interest is in the testing of a composite of the abilities. Both the theoretical analyses and the studies of simulated data in this paper suggest that the criteria of A-optimality and D-optimality lead to the most accurate estimates when all abilities are intentional, with the former slightly outperforming the latter. The criterion of E-optimality showed occasional erratic behavior for this case of adaptive testing, and its use is not recommended. If some of the abilities are nuisances, application of the criterion of A(s)-optimality (or D(s)-optimality), which focuses on the subset of intentional abilities is recommended. For the measurement of a linear combination of abilities, the criterion of c-optimality yielded the best results. The preferences of each of these criteria for items with specific patterns of parameter values was also assessed. It was found that the criteria differed mainly in their preferences of items with different patterns of values for their discrimination parameters.}, isbn = {0033-3123 (Print)0033-3123 (Linking)}, author = {Mulder, J. and van der Linden, W. J.} } @article {2154, title = {Multiple Maximum Exposure Rates in Computerized Adaptive Testing}, journal = {Applied Psychological Measurement}, volume = {33}, number = {1}, year = {2009}, pages = {58-73}, abstract = {

Computerized adaptive testing is subject to security problems, as the item bank content remains operative over long periods and administration time is flexible for examinees. Spreading the content of a part of the item bank could lead to an overestimation of the examinees\&$\#$39; trait level. The most common way of reducing this risk is to impose a maximum exposure rate (rmax) that no item should exceed. Several methods have been proposed with this aim. All of these methods establish a single value of rmax throughout the test. This study presents a new method, the multiple-rmax method, that defines as many values of rmax as the number of items presented in the test. In this way, it is possible to impose a high degree of randomness in item selection at the beginning of the test, leaving the administration of items with the best psychometric properties to the moment when the trait level estimation is most accurate. The implementation of the multiple-r max method is described and is tested in simulated item banks and in an operative bank. Compared with a single maximum exposure method, the new method has a more balanced usage of the item bank and delays the possible distortion of trait estimation due to security problems, with either no or only slight decrements of measurement accuracy.

}, doi = {10.1177/0146621608315329}, url = {http://apm.sagepub.com/content/33/1/58.abstract}, author = {Barrada, Juan Ram{\'o}n and Veldkamp, Bernard P. and Olea, Julio} } @inbook {1868, title = {The nine lives of CAT-ASVAB: Innovations and revelations}, year = {2009}, note = {{PDF File, 169 KB}}, address = {In D. J. Weiss (Ed.), Proceedings of the 2009 GMAC Conference on Computerized Adaptive Testing.}, abstract = {The Armed Services Vocational Aptitude Battery (ASVAB) is administered annually to more than one million military applicants and high school students. ASVAB scores are used to determine enlistment eligibility, assign applicants to military occupational specialties, and aid students in career exploration. The ASVAB is administered as both a paper-and-pencil (P\&P) test and a computerized adaptive test (CAT). CAT-ASVAB holds the distinction of being the first large-scale adaptive test battery to be administered in a high-stakes setting. Approximately two-thirds of military applicants currently take CAT-ASVAB; long-term plans are to replace P\&P-ASVAB with CAT-ASVAB at all test sites. Given CAT-ASVAB{\textquoteright}s pedigree{\textemdash}approximately 20 years in development and 20 years in operational administration{\textemdash}much can be learned from revisiting some of the major highlights of CATASVAB history. This paper traces the progression of CAT-ASVAB through nine major phases of development including: research and evelopment of the CAT-ASVAB prototype, the initial development of psychometric procedures and item pools, initial and full-scale operational implementation, the introduction of new item pools, the introduction of Windows administration, the introduction of Internet administration, and research and development of the next generation CATASVAB. A background and history is provided for each phase, including discussions of major research and operational issues, innovative approaches and practices, and lessons learned.}, author = {Pommerich, M and Segall, D. O. and Moreno, K. E.} } @inbook {1942, title = {Obtaining reliable diagnostic information through constrained CAT}, year = {2009}, note = {{PDF File, 252 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2009 GMAC Conference on Computerized Adaptive Testing.}, author = {Wang, C. and Chang, Hua-Hua and Douglas, J.} } @inbook {1741, title = {Optimizing item exposure control algorithms for polytomous computerized adaptive tests with restricted item banks}, year = {2009}, note = {{PDF File, 923 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2009 GMAC Conference on Computerized Adaptive Testing.}, author = {Chajewski, M. and Lewis, C.} } @article {673, title = {a posteriori estimation in adaptive testing: Adaptive a priori, adaptive correction for bias, and adaptive integration interval}, journal = {Journal of Applied Measurement}, volume = {10(2)}, year = {2009}, author = {Ra{\^\i}che, G. and Blais, J-G.} } @inbook {1797, title = {Practical issues concerning the application of the DINA model to CAT data}, year = {2009}, note = {{PDF file, 139 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2009 GMAC Conference on Computerized Adaptive Testing.}, author = {Huebner, A. and Wang, B. and Lee, S.} } @article {2153, title = {Predictive Control of Speededness in Adaptive Testing}, journal = {Applied Psychological Measurement}, volume = {33}, number = {1}, year = {2009}, pages = {25-41}, abstract = {

An adaptive testing method is presented that controls the speededness of a test using predictions of the test takers\&$\#$39; response times on the candidate items in the pool. Two different types of predictions are investigated: posterior predictions given the actual response times on the items already administered and posterior predictions that use the responses on these items as an additional source of information. In a simulation study with an adaptive test modeled after a test from the Armed Services Vocational Aptitude Battery, the effectiveness of the methods in removing differential speededness from the test was evaluated.

}, doi = {10.1177/0146621607314042}, url = {http://apm.sagepub.com/content/33/1/25.abstract}, author = {van der Linden, Wim J.} } @article {143, title = {Progress in assessing physical function in arthritis: PROMIS short forms and computerized adaptive testing}, journal = {Journal of Rheumatology}, volume = {36}, number = {9}, year = {2009}, note = {Fries, James FCella, DavidRose, MatthiasKrishnan, EswarBruce, BonnieU01 AR052158/AR/NIAMS NIH HHS/United StatesU01 AR52177/AR/NIAMS NIH HHS/United StatesConsensus Development ConferenceResearch Support, N.I.H., ExtramuralCanadaThe Journal of rheumatologyJ Rheumatol. 2009 Sep;36(9):2061-6.}, month = {Sep}, pages = {2061-2066}, edition = {2009/09/10}, abstract = {OBJECTIVE: Assessing self-reported physical function/disability with the Health Assessment Questionnaire Disability Index (HAQ) and other instruments has become central in arthritis research. Item response theory (IRT) and computerized adaptive testing (CAT) techniques can increase reliability and statistical power. IRT-based instruments can improve measurement precision substantially over a wider range of disease severity. These modern methods were applied and the magnitude of improvement was estimated. METHODS: A 199-item physical function/disability item bank was developed by distilling 1865 items to 124, including Legacy Health Assessment Questionnaire (HAQ) and Physical Function-10 items, and improving precision through qualitative and quantitative evaluation in over 21,000 subjects, which included about 1500 patients with rheumatoid arthritis and osteoarthritis. Four new instruments, (A) Patient-Reported Outcomes Measurement Information (PROMIS) HAQ, which evolved from the original (Legacy) HAQ; (B) "best" PROMIS 10; (C) 20-item static (short) forms; and (D) simulated PROMIS CAT, which sequentially selected the most informative item, were compared with the HAQ. RESULTS: Online and mailed administration modes yielded similar item and domain scores. The HAQ and PROMIS HAQ 20-item scales yielded greater information content versus other scales in patients with more severe disease. The "best" PROMIS 20-item scale outperformed the other 20-item static forms over a broad range of 4 standard deviations. The 10-item simulated PROMIS CAT outperformed all other forms. CONCLUSION: Improved items and instruments yielded better information. The PROMIS HAQ is currently available and considered validated. The new PROMIS short forms, after validation, are likely to represent further improvement. CAT-based physical function/disability assessment offers superior performance over static forms of equal length.}, keywords = {*Disability Evaluation, *Outcome Assessment (Health Care), Arthritis/diagnosis/*physiopathology, Health Surveys, Humans, Prognosis, Reproducibility of Results}, isbn = {0315-162X (Print)0315-162X (Linking)}, author = {Fries, J.F. and Cella, D. and Rose, M. and Krishnan, E. and Bruce, B.} } @booklet {1480, title = {Proposta para a construo de um Teste Adaptativo Informatizado baseado na Teoria da Resposta ao Item (Proposal for the construction of a Computerized Adaptive Testing based on the Item Response Theory)}, year = {2009}, note = {(In Portguese)}, address = {Poster session presented at the Congresso Brasileiro de Teoria da Resposta ao Item, Florianpolis SC Brazil}, author = {Moreira Junior, F. J. and Andrade, D. F.} } @inbook {1786, title = {Quantifying the impact of compromised items in CAT}, year = {2009}, note = {{PDF File, 438 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2009 GMAC Conference on Computerized Adaptive Testing.}, author = {Guo, F.} } @article {78, title = {Reduction in patient burdens with graphical computerized adaptive testing on the ADL scale: tool development and simulation}, journal = {Health and Quality of Life Outcomes}, volume = {7}, year = {2009}, note = {Chien, Tsair-WeiWu, Hing-ManWang, Weng-ChungCastillo, Roberto VasquezChou, WillyComparative StudyValidation StudiesEnglandHealth and quality of life outcomesHealth Qual Life Outcomes. 2009 May 5;7:39.}, pages = {39}, edition = {2009/05/07}, abstract = {BACKGROUND: The aim of this study was to verify the effectiveness and efficacy of saving time and reducing burden for patients, nurses, and even occupational therapists through computer adaptive testing (CAT). METHODS: Based on an item bank of the Barthel Index (BI) and the Frenchay Activities Index (FAI) for assessing comprehensive activities of daily living (ADL) function in stroke patients, we developed a visual basic application (VBA)-Excel CAT module, and (1) investigated whether the averaged test length via CAT is shorter than that of the traditional all-item-answered non-adaptive testing (NAT) approach through simulation, (2) illustrated the CAT multimedia on a tablet PC showing data collection and response errors of ADL clinical functional measures in stroke patients, and (3) demonstrated the quality control of endorsing scale with fit statistics to detect responding errors, which will be further immediately reconfirmed by technicians once patient ends the CAT assessment. RESULTS: The results show that endorsed items could be shorter on CAT (M = 13.42) than on NAT (M = 23) at 41.64\% efficiency in test length. However, averaged ability estimations reveal insignificant differences between CAT and NAT. CONCLUSION: This study found that mobile nursing services, placed at the bedsides of patients could, through the programmed VBA-Excel CAT module, reduce the burden to patients and save time, more so than the traditional NAT paper-and-pencil testing appraisals.}, keywords = {*Activities of Daily Living, *Computer Graphics, *Computer Simulation, *Diagnosis, Computer-Assisted, Female, Humans, Male, Point-of-Care Systems, Reproducibility of Results, Stroke/*rehabilitation, Taiwan, United States}, isbn = {1477-7525 (Electronic)1477-7525 (Linking)}, author = {Chien, T. W. and Wu, H. M. and Wang, W-C. and Castillo, R. V. and Chou, W.} } @article {173, title = {Replenishing a computerized adaptive test of patient-reported daily activity functioning}, journal = {Quality of Life Research}, volume = {18}, number = {4}, year = {2009}, note = {Haley, Stephen MNi, PengshengJette, Alan MTao, WeiMoed, RichardMeyers, DougLudlow, Larry HK02 HD45354-01/HD/NICHD NIH HHS/United StatesResearch Support, N.I.H., ExtramuralNetherlandsQuality of life research : an international journal of quality of life aspects of treatment, care and rehabilitationQual Life Res. 2009 May;18(4):461-71. Epub 2009 Mar 14.}, month = {May}, pages = {461-71}, edition = {2009/03/17}, abstract = {PURPOSE: Computerized adaptive testing (CAT) item banks may need to be updated, but before new items can be added, they must be linked to the previous CAT. The purpose of this study was to evaluate 41 pretest items prior to including them into an operational CAT. METHODS: We recruited 6,882 patients with spine, lower extremity, upper extremity, and nonorthopedic impairments who received outpatient rehabilitation in one of 147 clinics across 13 states of the USA. Forty-one new Daily Activity (DA) items were administered along with the Activity Measure for Post-Acute Care Daily Activity CAT (DA-CAT-1) in five separate waves. We compared the scoring consistency with the full item bank, test information function (TIF), person standard errors (SEs), and content range of the DA-CAT-1 to the new CAT (DA-CAT-2) with the pretest items by real data simulations. RESULTS: We retained 29 of the 41 pretest items. Scores from the DA-CAT-2 were more consistent (ICC = 0.90 versus 0.96) than DA-CAT-1 when compared with the full item bank. TIF and person SEs were improved for persons with higher levels of DA functioning, and ceiling effects were reduced from 16.1\% to 6.1\%. CONCLUSIONS: Item response theory and online calibration methods were valuable in improving the DA-CAT.}, keywords = {*Activities of Daily Living, *Disability Evaluation, *Questionnaires, *User-Computer Interface, Adult, Aged, Cohort Studies, Computer-Assisted Instruction, Female, Humans, Male, Middle Aged, Outcome Assessment (Health Care)/*methods}, isbn = {0962-9343 (Print)0962-9343 (Linking)}, author = {Haley, S. M. and Ni, P. and Jette, A. M. and Tao, W. and Moed, R. and Meyers, D. and Ludlow, L. H.} } @article {2185, title = {Studying the Equivalence of Computer-Delivered and Paper-Based Administrations of the Raven Standard Progressive Matrices Test}, journal = {Educational and Psychological Measurement}, volume = {69}, number = {5}, year = {2009}, pages = {855-867}, abstract = {

This study investigates the effect of mode of administration of the Raven Standard Progressive Matrices test on distribution, accuracy, and meaning of raw scores. A random sample of high school students take counterbalanced paper-and-pencil and computer-based administrations of the test and answer a questionnaire surveying preferences for computer-delivered test administrations. Administration mode effect is studied with repeated measures multivariate analysis of variance, internal consistency reliability estimates, and confirmatory factor analysis approaches. Results show a lack of test mode effect on distribution, accuracy, and meaning of raw scores. Participants indicate their preferences for the computer-delivered administration of the test. The article discusses findings in light of previous studies of the Raven Standard Progressive Matrices test.

}, doi = {10.1177/0013164409332219}, url = {http://epm.sagepub.com/content/69/5/855.abstract}, author = {Arce-Ferrer, Alvaro J. and Mart{\'\i}nez Guzm{\'a}n, Elvira} } @inbook {1729, title = {Termination criteria in computerized adaptive tests: Variable-length CATs are not biased.}, year = {2009}, address = { D. J. Weiss (Ed.), Proceedings of the 2009 GMAC Conference on Computerized Adaptive Testing.}, author = {Babcock, B. and Weiss, D. J.} } @inbook {1731, title = {Test overlap rate and item exposure rate as indicators of test security in CATs}, year = {2009}, note = {PDF File, 261 K}, address = {D. J. Weiss (Ed.), Proceedings of the 2009 GMAC Conference on Computerized Adaptive Testing.}, author = {Barrada, J and Olea, J. and Ponsoda, V. and Abad, F. J.} } @inbook {1817, title = {Using automatic item generation to address item demands for CAT}, year = {2009}, note = {{PDF File, 320 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2009 GMAC Conference on Computerized Adaptive Testing.}, author = {Lai, H. and Alves, C. and Gierl, M. J.} } @inbook {1912, title = {Utilizing the generalized likelihood ratio as a termination criterion}, year = {2009}, note = {{PDF File, 194 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2009 GMAC Conference on Computerized Adaptive Testing.}, abstract = {Computer-based testing can be used to classify examinees into mutually exclusive groups. Currently, the predominant psychometric algorithm for designing computerized classification tests (CCTs) is the sequential probability ratio test (SPRT; Reckase, 1983) based on item response theory (IRT). The SPRT has been shown to be more efficient than confidence intervals around θ estimates as a method for CCT delivery (Spray \& Reckase, 1996; Rudner, 2002). More recently, it was demonstrated that the SPRT, which only uses fixed values, is less efficient than a generalized form which tests whether a given examinee{\textquoteright}s θ is below θ1or above θ2 (Thompson, 2007). This formulation allows the indifference region to vary based on observed data. Moreover, this composite hypothesis formulation better represents the conceptual purpose of the test, which is to test whether θ is above or below the cutscore. The purpose of this study was to explore the specifications of the new generalized likelihood ratio (GLR; Huang, 2004). As with the SPRT, the efficiency of the procedure depends on the nominal error rates and the distance between θ1 and θ2 (Eggen, 1999). This study utilized a monte-carlo approach, with 10,000 examinees simulated under each condition, to evaluate differences in efficiency and accuracy due to hypothesis structure, nominal error rate, and indifference region size. The GLR was always at least as efficient as the fixed-point SPRT while maintaining equivalent levels of accuracy. }, author = {Thompson, N. A.} } @article {136, title = {Validation of the MMPI-2 computerized adaptive version (MMPI-2-CA) in a correctional intake facility}, journal = {Psychological Services}, volume = {6}, number = {4}, year = {2009}, pages = {279-292}, abstract = {Computerized adaptive testing in personality assessment can improve efficiency by significantly reducing the number of items administered to answer an assessment question. The time savings afforded by this technique could be of particular benefit in settings where large numbers of psychological screenings are conducted, such as correctional facilities. In the current study, item and time savings, as well as the test{\textendash}retest and extratest correlations associated with an audio augmented administration of all the scales of the Minnesota Multiphasic Personality Inventory (MMPI)-2 Computerized Adaptive (MMPI-2-CA) are reported. Participants include 366 men, ages 18 to 62 years (M = 33.04, SD = 10.40), undergoing intake into a large Midwestern state correctional facility. Results of the current study indicate considerable item and corresponding time savings for the MMPI-2-CA compared to conventional administration of the test, as well as comparability in terms of test{\textendash}retest and correlations with external measures. Future directions of adaptive personality testing are discussed.}, isbn = {1939-148X}, author = {Forbey, J. D. and Ben-Porath, Y. S. and Gartland, D.} } @article {76, title = {When cognitive diagnosis meets computerized adaptive testing: CD-CAT}, journal = {Psychometrika}, volume = {74}, number = {4}, year = {2009}, pages = {619-632}, author = {Cheng, Y} } @article {590, title = {Adaptive measurement of individual change}, journal = {Zeitschrift f{\"u}r Psychologie / Journal of Psychology}, volume = {216}, year = {2008}, note = {{PDF file, 568 KB}}, pages = {49-58}, doi = {10.1027/0044-3409.216.1.49}, author = {Kim-Kang, G. and Weiss, D. J.} } @article {742, title = {Adaptive models of psychological testing}, journal = {Zeitschrift f{\"u}r Psychologie / Journal of Psychology}, volume = {216(1)}, year = {2008}, pages = {3{\textendash}11}, author = {van der Linden, W. J.} } @article {2104, title = {Adaptive Models of Psychological Testing}, journal = {Zeitschrift f{\"u}r Psychologie / Journal of Psychology}, volume = {216}, year = {2008}, pages = {1-2}, doi = {10.1027/0044-3409.216.1.49}, author = {van der Linden, W. J.} } @article {212, title = {Adaptive short forms for outpatient rehabilitation outcome assessment}, journal = {American Journal of Physical Medicine and Rehabilitation}, volume = {87}, number = {10}, year = {2008}, note = {Jette, Alan MHaley, Stephen MNi, PengshengMoed, RichardK02 HD45354-01/HD/NICHD NIH HHS/United StatesR01 HD43568/HD/NICHD NIH HHS/United StatesResearch Support, N.I.H., ExtramuralResearch Support, U.S. Gov{\textquoteright}t, Non-P.H.S.Research Support, U.S. Gov{\textquoteright}t, P.H.S.United StatesAmerican journal of physical medicine \& rehabilitation / Association of Academic PhysiatristsAm J Phys Med Rehabil. 2008 Oct;87(10):842-52.}, month = {Oct}, pages = {842-52}, edition = {2008/09/23}, abstract = {OBJECTIVE: To develop outpatient Adaptive Short Forms for the Activity Measure for Post-Acute Care item bank for use in outpatient therapy settings. DESIGN: A convenience sample of 11,809 adults with spine, lower limb, upper limb, and miscellaneous orthopedic impairments who received outpatient rehabilitation in 1 of 127 outpatient rehabilitation clinics in the United States. We identified optimal items for use in developing outpatient Adaptive Short Forms based on the Basic Mobility and Daily Activities domains of the Activity Measure for Post-Acute Care item bank. Patient scores were derived from the Activity Measure for Post-Acute Care computerized adaptive testing program. Items were selected for inclusion on the Adaptive Short Forms based on functional content, range of item coverage, measurement precision, item exposure rate, and data collection burden. RESULTS: Two outpatient Adaptive Short Forms were developed: (1) an 18-item Basic Mobility Adaptive Short Form and (2) a 15-item Daily Activities Adaptive Short Form, derived from the same item bank used to develop the Activity Measure for Post-Acute Care computerized adaptive testing program. Both Adaptive Short Forms achieved acceptable psychometric properties. CONCLUSIONS: In outpatient postacute care settings where computerized adaptive testing outcome applications are currently not feasible, item response theory-derived Adaptive Short Forms provide the efficient capability to monitor patients{\textquoteright} functional outcomes. The development of Adaptive Short Form functional outcome instruments linked by a common, calibrated item bank has the potential to create a bridge to outcome monitoring across postacute care settings and can facilitate the eventual transformation from Adaptive Short Forms to computerized adaptive testing applications easier and more acceptable to the rehabilitation community.}, keywords = {*Activities of Daily Living, *Ambulatory Care Facilities, *Mobility Limitation, *Treatment Outcome, Disabled Persons/psychology/*rehabilitation, Female, Humans, Male, Middle Aged, Questionnaires, Rehabilitation Centers}, isbn = {1537-7385 (Electronic)}, author = {Jette, A. M. and Haley, S. M. and Ni, P. and Moed, R.} } @article {394, title = {Are we ready for computerized adaptive testing?}, journal = {Psychiatric Services}, volume = {59}, number = {4}, year = {2008}, note = {Unick, George JShumway, MarthaHargreaves, WilliamCommentUnited StatesPsychiatric services (Washington, D.C.)Psychiatr Serv. 2008 Apr;59(4):369.}, month = {Apr}, pages = {369}, edition = {2008/04/02}, keywords = {*Attitude of Health Personnel, *Diagnosis, Computer-Assisted/instrumentation, Humans, Mental Disorders/*diagnosis, Software}, isbn = {1075-2730 (Print)1075-2730 (Linking)}, author = {Unick, G. J. and Shumway, M. and Hargreaves, W.} } @article {88, title = {Assessing self-care and social function using a computer adaptive testing version of the pediatric evaluation of disability inventory}, journal = {Archives of Physical Medicine and Rehabilitation}, volume = {89}, number = {4}, year = {2008}, note = {Coster, Wendy JHaley, Stephen MNi, PengshengDumas, Helene MFragala-Pinkham, Maria AK02 HD45354-01A1/HD/NICHD NIH HHS/United StatesR41 HD052318-01A1/HD/NICHD NIH HHS/United StatesR43 HD42388-01/HD/NICHD NIH HHS/United StatesComparative StudyResearch Support, N.I.H., ExtramuralUnited StatesArchives of physical medicine and rehabilitationArch Phys Med Rehabil. 2008 Apr;89(4):622-9.}, month = {Apr}, pages = {622-629}, edition = {2008/04/01}, abstract = {OBJECTIVE: To examine score agreement, validity, precision, and response burden of a prototype computer adaptive testing (CAT) version of the self-care and social function scales of the Pediatric Evaluation of Disability Inventory compared with the full-length version of these scales. DESIGN: Computer simulation analysis of cross-sectional and longitudinal retrospective data; cross-sectional prospective study. SETTING: Pediatric rehabilitation hospital, including inpatient acute rehabilitation, day school program, outpatient clinics; community-based day care, preschool, and children{\textquoteright}s homes. PARTICIPANTS: Children with disabilities (n=469) and 412 children with no disabilities (analytic sample); 38 children with disabilities and 35 children without disabilities (cross-validation sample). INTERVENTIONS: Not applicable. MAIN OUTCOME MEASURES: Summary scores from prototype CAT applications of each scale using 15-, 10-, and 5-item stopping rules; scores from the full-length self-care and social function scales; time (in seconds) to complete assessments and respondent ratings of burden. RESULTS: Scores from both computer simulations and field administration of the prototype CATs were highly consistent with scores from full-length administration (r range, .94-.99). Using computer simulation of retrospective data, discriminant validity, and sensitivity to change of the CATs closely approximated that of the full-length scales, especially when the 15- and 10-item stopping rules were applied. In the cross-validation study the time to administer both CATs was 4 minutes, compared with over 16 minutes to complete the full-length scales. CONCLUSIONS: Self-care and social function score estimates from CAT administration are highly comparable with those obtained from full-length scale administration, with small losses in validity and precision and substantial decreases in administration time.}, keywords = {*Disability Evaluation, *Social Adjustment, Activities of Daily Living, Adolescent, Age Factors, Child, Child, Preschool, Computer Simulation, Cross-Over Studies, Disabled Children/*rehabilitation, Female, Follow-Up Studies, Humans, Infant, Male, Outcome Assessment (Health Care), Reference Values, Reproducibility of Results, Retrospective Studies, Risk Factors, Self Care/*standards/trends, Sex Factors, Sickness Impact Profile}, isbn = {1532-821X (Electronic)0003-9993 (Linking)}, author = {Coster, W. J. and Haley, S. M. and Ni, P. and Dumas, H. M. and Fragala-Pinkham, M. A.} } @conference {phankokkruad2008automated, title = {An Automated Decision System for Computer Adaptive Testing Using Genetic Algorithms}, booktitle = {Software Engineering, Artificial Intelligence, Networking, and Parallel/Distributed Computing, 2008. SNPD{\textquoteright}08. Ninth ACIS International Conference on}, year = {2008}, pages = {655{\textendash}660}, publisher = {IEEE}, organization = {IEEE}, abstract = {This paper proposes an approach to solve the triangle decision tree problem for computer adaptive testing (CAT) using genetic algorithms (GAs). In this approach, item response theory (IRT) parameters composed of discrimination, difficulty, and guess are firstly obtained and stored in an item bank. Then a fitness function, which is based on IRT parameters, of GAs for obtaining an optimal solution is set up. Finally, the GAs is applied to the parameters of the item bank so that an optimal decision tree is generated. Based on a six-level triangle-decision tree for examination items, the experimental results show that the optimal decision tree can be generated correctly when compared with the standard patterns.}, author = {Phankokkruad, M. and Woraratpanya, K.} } @article {241, title = {Binary items and beyond: a simulation of computer adaptive testing using the Rasch partial credit model}, journal = {Journal of Applied Measurement}, volume = {9}, number = {1}, year = {2008}, note = {Lange, RenseUnited StatesJournal of applied measurementJ Appl Meas. 2008;9(1):81-104.}, pages = {81-104}, edition = {2008/01/09}, abstract = {Past research on Computer Adaptive Testing (CAT) has focused almost exclusively on the use of binary items and minimizing the number of items to be administrated. To address this situation, extensive computer simulations were performed using partial credit items with two, three, four, and five response categories. Other variables manipulated include the number of available items, the number of respondents used to calibrate the items, and various manipulations of respondents{\textquoteright} true locations. Three item selection strategies were used, and the theoretically optimal Maximum Information method was compared to random item selection and Bayesian Maximum Falsification approaches. The Rasch partial credit model proved to be quite robust to various imperfections, and systematic distortions did occur mainly in the absence of sufficient numbers of items located near the trait or performance levels of interest. The findings further indicate that having small numbers of items is more problematic in practice than having small numbers of respondents to calibrate these items. Most importantly, increasing the number of response categories consistently improved CAT{\textquoteright}s efficiency as well as the general quality of the results. In fact, increasing the number of response categories proved to have a greater positive impact than did the choice of item selection method, as the Maximum Information approach performed only slightly better than the Maximum Falsification approach. Accordingly, issues related to the efficiency of item selection methods are far less important than is commonly suggested in the literature. However, being based on computer simulations only, the preceding presumes that actual respondents behave according to the Rasch model. CAT research could thus benefit from empirical studies aimed at determining whether, and if so, how, selection strategies impact performance.}, keywords = {*Data Interpretation, Statistical, *User-Computer Interface, Educational Measurement/*statistics \& numerical data, Humans, Illinois, Models, Statistical}, isbn = {1529-7713 (Print)1529-7713 (Linking)}, author = {Lange, R.} } @article {2127, title = {CAT-MD: Computerized adaptive testing on mobile devices}, journal = {International Journal of Web-Based Learning and Teaching Technologies}, volume = {3}, number = {1}, year = {2008}, pages = {13-20}, author = {Triantafillou, E. and Georgiadou, E. and Economides, A. A.} } @article {273, title = {Combining computer adaptive testing technology with cognitively diagnostic assessment}, journal = {Behavioral Research Methods }, volume = {40}, number = {3}, year = {2008}, note = {McGlohen, MeghanChang, Hua-HuaUnited StatesBehavior research methodsBehav Res Methods. 2008 Aug;40(3):808-21.}, month = {Aug}, pages = {808-21}, edition = {2008/08/14}, abstract = {A major advantage of computerized adaptive testing (CAT) is that it allows the test to home in on an examinee{\textquoteright}s ability level in an interactive manner. The aim of the new area of cognitive diagnosis is to provide information about specific content areas in which an examinee needs help. The goal of this study was to combine the benefit of specific feedback from cognitively diagnostic assessment with the advantages of CAT. In this study, three approaches to combining these were investigated: (1) item selection based on the traditional ability level estimate (theta), (2) item selection based on the attribute mastery feedback provided by cognitively diagnostic assessment (alpha), and (3) item selection based on both the traditional ability level estimate (theta) and the attribute mastery feedback provided by cognitively diagnostic assessment (alpha). The results from these three approaches were compared for theta estimation accuracy, attribute mastery estimation accuracy, and item exposure control. The theta- and alpha-based condition outperformed the alpha-based condition regarding theta estimation, attribute mastery pattern estimation, and item exposure control. Both the theta-based condition and the theta- and alpha-based condition performed similarly with regard to theta estimation, attribute mastery estimation, and item exposure control, but the theta- and alpha-based condition has an additional advantage in that it uses the shadow test method, which allows the administrator to incorporate additional constraints in the item selection process, such as content balancing, item type constraints, and so forth, and also to select items on the basis of both the current theta and alpha estimates, which can be built on top of existing 3PL testing programs.}, keywords = {*Cognition, *Computers, *Models, Statistical, *User-Computer Interface, Diagnosis, Computer-Assisted/*instrumentation, Humans}, isbn = {1554-351X (Print)}, author = {McGlohen, M. and Chang, Hua-Hua} } @article {2179, title = {Comparability of Computer-Based and Paper-and-Pencil Testing in K{\textendash}12 Reading Assessments}, journal = {Educational and Psychological Measurement}, volume = {68}, number = {1}, year = {2008}, pages = {5-24}, abstract = {

In recent years, computer-based testing (CBT) has grown in popularity, is increasingly being implemented across the United States, and will likely become the primary mode for delivering tests in the future. Although CBT offers many advantages over traditional paper-and-pencil testing, assessment experts, researchers, practitioners, and users have expressed concern about the comparability of scores between the two test administration modes. To help provide an answer to this issue, a meta-analysis was conducted to synthesize the administration mode effects of CBTs and paper-and-pencil tests on K\—12 student reading assessments. Findings indicate that the administration mode had no statistically significant effect on K\—12 student reading achievement scores. Four moderator variables\—study design, sample size, computer delivery algorithm, and computer practice\—made statistically significant contributions to predicting effect size. Three moderator variables\—grade level, type of test, and computer delivery method\—did not affect the differences in reading scores between test modes.

}, doi = {10.1177/0013164407305592}, url = {http://epm.sagepub.com/content/68/1/5.abstract}, author = {Shudong Wang, and Hong Jiao, and Young, Michael J. and Brooks, Thomas and Olson, John} } @article {2102, title = {Computer Adaptive-Attribute Testing A New Approach to Cognitive Diagnostic Assessment}, journal = {Zeitschrift f{\"u}r Psychologie / Journal of Psychology}, volume = {216}, year = {2008}, pages = {29-39}, abstract = {

The influence of interdisciplinary forces stemming from developments in cognitive science,mathematical statistics, educational
psychology, and computing science are beginning to appear in educational and psychological assessment. Computer adaptive-attribute testing (CA-AT) is one example. The concepts and procedures in CA-AT can be found at the intersection between computer adaptive testing and cognitive diagnostic assessment. CA-AT allows us to fuse the administrative benefits of computer adaptive testing with the psychological benefits of cognitive diagnostic assessment to produce an innovative psychologically-based adaptive testing approach. We describe the concepts behind CA-AT as well as illustrate how it can be used to promote formative, computer-based, classroom assessment.

}, keywords = {cognition and assessment, cognitive diagnostic assessment, computer adaptive testing}, doi = {10.1027/0044-3409.216.1.29}, author = {Gierl, M. J. and Zhou, J.} } @article {2181, title = {Computer-Based and Paper-and-Pencil Administration Mode Effects on a Statewide End-of-Course English Test}, journal = {Educational and Psychological Measurement}, volume = {68}, number = {4}, year = {2008}, pages = {554-570}, abstract = {

The current study compared student performance between paper-and-pencil testing (PPT) and computer-based testing (CBT) on a large-scale statewide end-of-course English examination. Analyses were conducted at both the item and test levels. The overall results suggest that scores obtained from PPT and CBT were comparable. However, at the content domain level, a rather large difference in the reading comprehension section suggests that reading comprehension test may be more affected by the test administration mode. Results from the confirmatory factor analysis suggest that the administration mode did not alter the construct of the test.

}, doi = {10.1177/0013164407310132}, url = {http://epm.sagepub.com/content/68/4/554.abstract}, author = {Kim, Do-Hong and Huynh, Huynh} } @article {169, title = {Computerized adaptive testing for follow-up after discharge from inpatient rehabilitation: II. Participation outcomes}, journal = {Archives of Physical Medicine and Rehabilitation}, volume = {89}, number = {2}, year = {2008}, note = {Haley, Stephen MGandek, BarbaraSiebens, HilaryBlack-Schaffer, Randie MSinclair, Samuel JTao, WeiCoster, Wendy JNi, PengshengJette, Alan MK02 HD045354-01A1/HD/NICHD NIH HHS/United StatesK02 HD45354-01/HD/NICHD NIH HHS/United StatesR01 HD043568/HD/NICHD NIH HHS/United StatesR01 HD043568-01/HD/NICHD NIH HHS/United StatesResearch Support, N.I.H., ExtramuralUnited StatesArchives of physical medicine and rehabilitationArch Phys Med Rehabil. 2008 Feb;89(2):275-83.}, month = {Feb}, pages = {275-283}, edition = {2008/01/30}, abstract = {OBJECTIVES: To measure participation outcomes with a computerized adaptive test (CAT) and compare CAT and traditional fixed-length surveys in terms of score agreement, respondent burden, discriminant validity, and responsiveness. DESIGN: Longitudinal, prospective cohort study of patients interviewed approximately 2 weeks after discharge from inpatient rehabilitation and 3 months later. SETTING: Follow-up interviews conducted in patient{\textquoteright}s home setting. PARTICIPANTS: Adults (N=94) with diagnoses of neurologic, orthopedic, or medically complex conditions. INTERVENTIONS: Not applicable. MAIN OUTCOME MEASURES: Participation domains of mobility, domestic life, and community, social, \& civic life, measured using a CAT version of the Participation Measure for Postacute Care (PM-PAC-CAT) and a 53-item fixed-length survey (PM-PAC-53). RESULTS: The PM-PAC-CAT showed substantial agreement with PM-PAC-53 scores (intraclass correlation coefficient, model 3,1, .71-.81). On average, the PM-PAC-CAT was completed in 42\% of the time and with only 48\% of the items as compared with the PM-PAC-53. Both formats discriminated across functional severity groups. The PM-PAC-CAT had modest reductions in sensitivity and responsiveness to patient-reported change over a 3-month interval as compared with the PM-PAC-53. CONCLUSIONS: Although continued evaluation is warranted, accurate estimates of participation status and responsiveness to change for group-level analyses can be obtained from CAT administrations, with a sizeable reduction in respondent burden.}, keywords = {*Activities of Daily Living, *Adaptation, Physiological, *Computer Systems, *Questionnaires, Adult, Aged, Aged, 80 and over, Chi-Square Distribution, Factor Analysis, Statistical, Female, Humans, Longitudinal Studies, Male, Middle Aged, Outcome Assessment (Health Care)/*methods, Patient Discharge, Prospective Studies, Rehabilitation/*standards, Subacute Care/*standards}, isbn = {1532-821X (Electronic)0003-9993 (Linking)}, author = {Haley, S. M. and Gandek, B. and Siebens, H. and Black-Schaffer, R. M. and Sinclair, S. J. and Tao, W. and Coster, W. J. and Ni, P. and Jette, A. M.} } @article {186, title = {Computerized adaptive testing for patients with knee inpairments produced valid and responsive measures of function}, journal = {Journal of Clinical Epidemiology}, volume = {61}, number = {4}, year = {2008}, pages = {1113-1124}, author = {Hart, D. L. and Wang, Y-C. and Stratford, P. W. and Mioduski, J. E.} } @article {231, title = {Computerized adaptive testing in back pain: Validation of the CAT-5D-QOL}, journal = {Spine}, volume = {33}, number = {12}, year = {2008}, note = {Kopec, Jacek ABadii, MaziarMcKenna, MarioLima, Viviane DSayre, Eric CDvorak, MarcelResearch Support, Non-U.S. Gov{\textquoteright}tValidation StudiesUnited StatesSpineSpine (Phila Pa 1976). 2008 May 20;33(12):1384-90.}, month = {May 20}, pages = {1384-90}, edition = {2008/05/23}, abstract = {STUDY DESIGN: We have conducted an outcome instrument validation study. OBJECTIVE: Our objective was to develop a computerized adaptive test (CAT) to measure 5 domains of health-related quality of life (HRQL) and assess its feasibility, reliability, validity, and efficiency. SUMMARY OF BACKGROUND DATA: Kopec and colleagues have recently developed item response theory based item banks for 5 domains of HRQL relevant to back pain and suitable for CAT applications. The domains are Daily Activities (DAILY), Walking (WALK), Handling Objects (HAND), Pain or Discomfort (PAIN), and Feelings (FEEL). METHODS: An adaptive algorithm was implemented in a web-based questionnaire administration system. The questionnaire included CAT-5D-QOL (5 scales), Modified Oswestry Disability Index (MODI), Roland-Morris Disability Questionnaire (RMDQ), SF-36 Health Survey, and standard clinical and demographic information. Participants were outpatients treated for mechanical back pain at a referral center in Vancouver, Canada. RESULTS: A total of 215 patients completed the questionnaire and 84 completed a retest. On average, patients answered 5.2 items per CAT-5D-QOL scale. Reliability ranged from 0.83 (FEEL) to 0.92 (PAIN) and was 0.92 for the MODI, RMDQ, and Physical Component Summary (PCS-36). The ceiling effect was 0.5\% for PAIN compared with 2\% for MODI and 5\% for RMQ. The CAT-5D-QOL scales correlated as anticipated with other measures of HRQL and discriminated well according to the level of satisfaction with current symptoms, duration of the last episode, sciatica, and disability compensation. The average relative discrimination index was 0.87 for PAIN, 0.67 for DAILY and 0.62 for WALK, compared with 0.89 for MODI, 0.80 for RMDQ, and 0.59 for PCS-36. CONCLUSION: The CAT-5D-QOL is feasible, reliable, valid, and efficient in patients with back pain. This methodology can be recommended for use in back pain research and should improve outcome assessment, facilitate comparisons across studies, and reduce patient burden.}, keywords = {*Disability Evaluation, *Health Status Indicators, *Quality of Life, Adult, Aged, Algorithms, Back Pain/*diagnosis/psychology, British Columbia, Diagnosis, Computer-Assisted/*standards, Feasibility Studies, Female, Humans, Internet, Male, Middle Aged, Predictive Value of Tests, Questionnaires/*standards, Reproducibility of Results}, isbn = {1528-1159 (Electronic)0362-2436 (Linking)}, author = {Kopec, J. A. and Badii, M. and McKenna, M. and Lima, V. D. and Sayre, E. C. and Dvorak, M.} } @article {2103, title = {Computerized Adaptive Testing of Personality Traits}, journal = {Zeitschrift f{\"u}r Psychologie / Journal of Psychology}, volume = {216}, year = {2008}, pages = {12-21}, abstract = {

A computerized adaptive testing (CAT) procedure was simulated with ordinal polytomous personality data collected using a
conventional paper-and-pencil testing format. An adapted Dutch version of the dominance scale of Gough and Heilbrun\’s Adjective
Check List (ACL) was used. This version contained Likert response scales with five categories. Item parameters were estimated using Samejima\’s graded response model from the responses of 1,925 subjects. The CAT procedure was simulated using the responses of 1,517 other subjects. The value of the required standard error in the stopping rule of the CAT was manipulated. The relationship between CAT latent trait estimates and estimates based on all dominance items was studied. Additionally, the pattern of relationships between the CAT latent trait estimates and the other ACL scales was compared to that between latent trait estimates based on the entire item pool and the other ACL scales. The CAT procedure resulted in latent trait estimates qualitatively equivalent to latent trait estimates based on all items, while a substantial reduction of the number of used items could be realized (at the stopping rule of 0.4 about 33\% of the 36 items was used).

}, keywords = {Adaptive Testing, cmoputer-assisted testing, Item Response Theory, Likert scales, Personality Measures}, doi = {10.1027/0044-3409.216.1.12}, author = {Hol, A. M. and Vorst, H. C. M. and Mellenbergh, G. J.} } @article {73, title = {Controlling item exposure and test overlap on the fly in computerized adaptive testing}, journal = {British Journal of Mathematical and Statistical Psychology}, volume = {61}, number = {2}, year = {2008}, note = {Chen, Shu-YingLei, Pui-WaLiao, Wen-HanResearch Support, Non-U.S. Gov{\textquoteright}tEnglandThe British journal of mathematical and statistical psychologyBr J Math Stat Psychol. 2008 Nov;61(Pt 2):471-92. Epub 2007 Jul 23.}, month = {Nov}, pages = {471-92}, edition = {2007/07/26}, abstract = {This paper proposes an on-line version of the Sympson and Hetter procedure with test overlap control (SHT) that can provide item exposure control at both the item and test levels on the fly without iterative simulations. The on-line procedure is similar to the SHT procedure in that exposure parameters are used for simultaneous control of item exposure rates and test overlap rate. The exposure parameters for the on-line procedure, however, are updated sequentially on the fly, rather than through iterative simulations conducted prior to operational computerized adaptive tests (CATs). Unlike the SHT procedure, the on-line version can control item exposure rate and test overlap rate without time-consuming iterative simulations even when item pools or examinee populations have been changed. Moreover, the on-line procedure was found to perform better than the SHT procedure in controlling item exposure and test overlap for examinees who take tests earlier. Compared with two other on-line alternatives, this proposed on-line method provided the best all-around test security control. Thus, it would be an efficient procedure for controlling item exposure and test overlap in CATs.}, keywords = {*Decision Making, Computer-Assisted, *Models, Psychological, Humans}, isbn = {0007-1102 (Print)0007-1102 (Linking)}, author = {Chen, S-Y. and Lei, P. W. and Liao, W. H.} } @conference {101, title = {Developing a progressive approach to using the GAIN in order to reduce the duration and cost of assessment with the GAIN short screener, Quick and computer adaptive testing}, booktitle = {Joint Meeting on Adolescent Treatment Effectiveness }, year = {2008}, note = {ProCite field[6]: Paper presented at the}, month = {2008}, address = {Washington D.C., USA}, author = {Dennis, M. L. and Funk, R. and Titus, J. and Riley, B. B. and Hosman, S. and Kinne, S.} } @article {307, title = {The D-optimality item selection criterion in the early stage of CAT: A study with the graded response model}, journal = {Journal of Educational and Behavioral Statistics}, volume = {33}, number = {1}, year = {2008}, pages = {88-110}, abstract = {During the early stage of computerized adaptive testing (CAT), item selection criteria based on Fisher{\textquoteright}s information often produce less stable latent trait estimates than the Kullback-Leibler global information criterion. Robustness against early stage instability has been reported for the D-optimality criterion in a polytomous CAT with the Nominal Response Model and is shown herein to be reproducible for the Graded Response Model. For comparative purposes, the A-optimality and the global information criteria are also applied. Their item selection is investigated as a function of test progression and item bank composition. The results indicate how the selection of specific item parameters underlies the criteria performances evaluated via accuracy and precision of estimation. In addition, the criteria item exposure rates are compared, without the use of any exposure controlling measure. On the account of stability, precision, accuracy, numerical simplicity, and less evidently, item exposure rate, the D-optimality criterion can be recommended for CAT.}, keywords = {computerized adaptive testing, D optimality, item selection}, author = {Passos, V. L. and Berger, M. P. F. and Tan, F. E. S.} } @book {1672, title = {Effect of early misfit in computerized adaptive testing on the recovery of theta}, year = {2008}, note = {{PDF file, 1,004 KB}}, address = {Unpublished Ph.D. dissertation, University of Minnesota, Minneapolis MN.}, author = {Guyer, R. D.} } @article {5, title = {Efficiency and sensitivity of multidimensional computerized adaptive testing of pediatric physical functioning}, journal = {Disability \& Rehabilitation}, volume = {30}, number = {6}, year = {2008}, note = {Allen, Diane DNi, PengshengHaley, Stephen MK02 HD45354-01/HD/NICHD NIH HHS/United StatesNIDDR H133P0001/DD/NCBDD CDC HHS/United StatesResearch Support, N.I.H., ExtramuralEnglandDisability and rehabilitationDisabil Rehabil. 2008;30(6):479-84.}, pages = {479-84}, edition = {2008/02/26}, abstract = {PURPOSE: Computerized adaptive tests (CATs) have efficiency advantages over fixed-length tests of physical functioning but may lose sensitivity when administering extremely low numbers of items. Multidimensional CATs may efficiently improve sensitivity by capitalizing on correlations between functional domains. Using a series of empirical simulations, we assessed the efficiency and sensitivity of multidimensional CATs compared to a longer fixed-length test. METHOD: Parent responses to the Pediatric Evaluation of Disability Inventory before and after intervention for 239 children at a pediatric rehabilitation hospital provided the data for this retrospective study. Reliability, effect size, and standardized response mean were compared between full-length self-care and mobility subscales and simulated multidimensional CATs with stopping rules at 40, 30, 20, and 10 items. RESULTS: Reliability was lowest in the 10-item CAT condition for the self-care (r = 0.85) and mobility (r = 0.79) subscales; all other conditions had high reliabilities (r > 0.94). All multidimensional CAT conditions had equivalent levels of sensitivity compared to the full set condition for both domains. CONCLUSIONS: Multidimensional CATs efficiently retain the sensitivity of longer fixed-length measures even with 5 items per dimension (10-item CAT condition). Measuring physical functioning with multidimensional CATs could enhance sensitivity following intervention while minimizing response burden.}, keywords = {*Disability Evaluation, Child, Computers, Disabled Children/*classification/rehabilitation, Efficiency, Humans, Outcome Assessment (Health Care), Psychometrics, Reproducibility of Results, Retrospective Studies, Self Care, Sensitivity and Specificity}, isbn = {0963-8288 (Print)0963-8288 (Linking)}, author = {Allen, D. D. and Ni, P. and Haley, S. M.} } @article {20, title = {Functioning and validity of a computerized adaptive test to measure anxiety (A CAT)}, journal = {Depression and Anxiety}, volume = {25}, number = {12}, year = {2008}, pages = {E182-E194}, abstract = {Background: The aim of this study was to evaluate the Computerized Adaptive Test to measure anxiety (A-CAT), a patient-reported outcome questionnaire that uses computerized adaptive testing to measure anxiety. Methods: The A-CAT builds on an item bank of 50 items that has been built using conventional item analyses and item response theory analyses. The A-CAT was administered on Personal Digital Assistants to n=357 patients diagnosed and treated at the department of Psychosomatic Medicine and Psychotherapy, Charit{\'e} Berlin, Germany. For validation purposes, two subgroups of patients (n=110 and 125) answered the A-CAT along with established anxiety and depression questionnaires. Results: The A-CAT was fast to complete (on average in 2 min, 38 s) and a precise item response theory based CAT score (reliability>.9) could be estimated after 4{\textendash}41 items. On average, the CAT displayed 6 items (SD=4.2). Convergent validity of the A-CAT was supported by correlations to existing tools (Hospital Anxiety and Depression Scale-A, Beck Anxiety Inventory, Berliner Stimmungs-Fragebogen A/D, and State Trait Anxiety Inventory: r=.56{\textendash}.66); discriminant validity between diagnostic groups was higher for the A-CAT than for other anxiety measures. Conclusions: The German A-CAT is an efficient, reliable, and valid tool for assessing anxiety in patients suffering from anxiety disorders and other conditions with significant potential for initial assessment and long-term treatment monitoring. Future research directions are to explore content balancing of the item selection algorithm of the CAT, to norm the tool to a healthy sample, and to develop practical cutoff scores. Depression and Anxiety, 2008. {\textcopyright} 2008 Wiley-Liss, Inc.}, isbn = {1520-6394}, author = {Becker, J. and Fliege, H. and Kocalevent, R. D. and Bjorner, J. B. and Rose, M. and Walter, O. B. and Klapp, B. F.} } @article {594, title = {ICAT: An adaptive testing procedure for the identification of idiosyncratic knowledge patterns}, journal = {Zeitschrift f{\"u}r Psychologie / Journal of Psychology}, volume = {216(1)}, year = {2008}, pages = {40{\textendash}48}, author = {Kingsbury, G. G. and Houser, R.L.} } @article {225, title = {ICAT: An adaptive testing procedure for the identification of idiosyncratic knowledge patterns}, journal = {Zeitschrift f{\"u}r Psychologie}, volume = {216}, number = {1}, year = {2008}, pages = {40-48}, abstract = {

Traditional adaptive tests provide an efficient method for estimating student achievements levels, by adjusting the characteristicsof the test questions to match the performance of each student. These traditional adaptive tests are not designed to identify diosyncraticknowledge patterns. As students move through their education, they learn content in any number of different ways related to their learning style and cognitive development. This may result in a student having different achievement levels from one content area to another within a domain of content. This study investigates whether such idiosyncratic knowledge patterns exist. It discusses the differences between idiosyncratic knowledge patterns and multidimensionality. Finally, it proposes an adaptive testing procedure that can be used to identify a student\’s areas of strength and weakness more efficiently than current adaptive testing approaches. The findings of the study indicate that a fairly large number of students may have test results that are influenced by their idiosyncratic knowledge patterns. The findings suggest that these patterns persist across time for a large number of students, and that the differences in student performance between content areas within a subject domain are large enough to allow them to be useful in instruction. Given the existence of idiosyncratic patterns of knowledge, the proposed testing procedure may enable us to provide more useful information to teachers. It should also allow us to differentiate between idiosyncratic patterns or knowledge, and important mutidimensionality in the testing data.

}, keywords = {computerized adaptive testing}, author = {Kingsbury, G. G. and Houser, R.L.} } @article {2034, title = {Impact of altering randomization intervals on precision of measurement and item exposure}, journal = {Journal of Applied Measurement}, volume = {9}, year = {2008}, pages = {160-167}, abstract = {

This paper reports on the use of simulation when a randomization procedure is used to control item exposure in a computerized adaptive test for certification. We present a method to determine the optimum width of the interval from which items are selected and we report on the impact of relaxing the interval width on measurement precision and item exposure. Results indicate that, if the item bank is well targeted, it may be possible to widen the randomization interval and thus reduce item exposure, without seriously impacting the error of measure for test takers whose ability estimate is near the pass point.

}, author = {Muckle, T. J. and Bergstrom, B. A. and Becker, K. and Stahl, J. A.} } @article {2234, title = {Implementing Sympson-Hetter Item-Exposure Control in a Shadow-Test Approach to Constrained Adaptive Testing}, journal = {International Journal of Testing}, volume = {8}, number = {3}, year = {2008}, pages = {272-289}, doi = {10.1080/15305050802262233}, url = {http://www.tandfonline.com/doi/abs/10.1080/15305050802262233}, author = {Veldkamp, Bernard P. and van der Linden, Wim J.} } @article {454, title = {Incorporating randomness in the Fisher information for improving item-exposure control in CATs}, journal = {British Journal of Mathematical and Statistical Psychology}, volume = {61}, year = {2008}, pages = {493-513}, author = {Barrada, J and Olea, J. and Ponsoda, V. and Abad, F. J.} } @article {117, title = {An initial application of computerized adaptive testing (CAT) for measuring disability in patients with low back pain}, journal = {BMC Musculoskelet Disorders}, volume = {9}, number = {1}, year = {2008}, note = {Journal articleBMC musculoskeletal disordersBMC Musculoskelet Disord. 2008 Dec 18;9(1):166.}, month = {Dec 18}, pages = {166}, edition = {2008/12/20}, abstract = {ABSTRACT: BACKGROUND: Recent approaches to outcome measurement involving Computerized Adaptive Testing (CAT) offer an approach for measuring disability in low back pain (LBP) in a way that can reduce the burden upon patient and professional. The aim of this study was to explore the potential of CAT in LBP for measuring disability as defined in the International Classification of Functioning, Disability and Health (ICF) which includes impairments, activity limitation, and participation restriction. METHODS: 266 patients with low back pain answered questions from a range of widely used questionnaires. An exploratory factor analysis (EFA) was used to identify disability dimensions which were then subjected to Rasch analysis. Reliability was tested by internal consistency and person separation index (PSI). Discriminant validity of disability levels were evaluated by Spearman correlation coefficient (r), intraclass correlation coefficient [ICC(2,1)] and the Bland-Altman approach. A CAT was developed for each dimension, and the results checked against simulated and real applications from a further 133 patients. RESULTS: Factor analytic techniques identified two dimensions named "body functions" and "activity-participation". After deletion of some items for failure to fit the Rasch model, the remaining items were mostly free of Differential Item Functioning (DIF) for age and gender. Reliability exceeded 0.90 for both dimensions. The disability levels generated using all items and those obtained from the real CAT application were highly correlated (i.e. >0.97 for both dimensions). On average, 19 and 14 items were needed to estimate the precise disability levels using the initial CAT for the first and second dimension. However, a marginal increase in the standard error of the estimate across successive iterations substantially reduced the number of items required to make an estimate. CONCLUSIONS: Using a combination approach of EFA and Rasch analysis this study has shown that it is possible to calibrate items onto a single metric in a way that can be used to provide the basis of a CAT application. Thus there is an opportunity to obtain a wide variety of information to evaluate the biopsychosocial model in its more complex forms, without necessarily increasing the burden of information collection for patients.}, isbn = {1471-2474 (Electronic)}, author = {Elhan, A. H. and Oztuna, D. and Kutlay, S. and Kucukdeveci, A. A. and Tennant, A.} } @article {2120, title = {Investigating item exposure control on the fly in computerized adaptive testing}, journal = {Psychological Testing}, volume = {55}, year = {2008}, pages = {1-32}, author = {Wu, M.-L. and Chen, S-Y.} } @article {584, title = {Item exposure control in a-stratified computerized adaptive testing}, journal = {Psychological Testing}, volume = {55}, year = {2008}, pages = {793-811}, author = {Jhu, Y.-J., and Chen, S-Y.} } @article {84, title = {Letting the CAT out of the bag: Comparing computer adaptive tests and an 11-item short form of the Roland-Morris Disability Questionnaire}, journal = {Spine}, volume = {33}, number = {12}, year = {2008}, note = {Cook, Karon FChoi, Seung WCrane, Paul KDeyo, Richard AJohnson, Kurt LAmtmann, Dagmar5 P60-AR48093/AR/United States NIAMS5U01AR052171-03/AR/United States NIAMSComparative StudyResearch Support, N.I.H., ExtramuralUnited StatesSpineSpine. 2008 May 20;33(12):1378-83.}, month = {May 20}, pages = {1378-83}, edition = {2008/05/23}, abstract = {STUDY DESIGN: A post hoc simulation of a computer adaptive administration of the items of a modified version of the Roland-Morris Disability Questionnaire. OBJECTIVE: To evaluate the effectiveness of adaptive administration of back pain-related disability items compared with a fixed 11-item short form. SUMMARY OF BACKGROUND DATA: Short form versions of the Roland-Morris Disability Questionnaire have been developed. An alternative to paper-and-pencil short forms is to administer items adaptively so that items are presented based on a person{\textquoteright}s responses to previous items. Theoretically, this allows precise estimation of back pain disability with administration of only a few items. MATERIALS AND METHODS: Data were gathered from 2 previously conducted studies of persons with back pain. An item response theory model was used to calibrate scores based on all items, items of a paper-and-pencil short form, and several computer adaptive tests (CATs). RESULTS: Correlations between each CAT condition and scores based on a 23-item version of the Roland-Morris Disability Questionnaire ranged from 0.93 to 0.98. Compared with an 11-item short form, an 11-item CAT produced scores that were significantly more highly correlated with scores based on the 23-item scale. CATs with even fewer items also produced scores that were highly correlated with scores based on all items. For example, scores from a 5-item CAT had a correlation of 0.93 with full scale scores. Seven- and 9-item CATs correlated at 0.95 and 0.97, respectively. A CAT with a standard-error-based stopping rule produced scores that correlated at 0.95 with full scale scores. CONCLUSION: A CAT-based back pain-related disability measure may be a valuable tool for use in clinical and research contexts. Use of CAT for other common measures in back pain research, such as other functional scales or measures of psychological distress, may offer similar advantages.}, keywords = {*Disability Evaluation, *Health Status Indicators, Adult, Aged, Aged, 80 and over, Back Pain/*diagnosis/psychology, Calibration, Computer Simulation, Diagnosis, Computer-Assisted/*standards, Humans, Middle Aged, Models, Psychological, Predictive Value of Tests, Questionnaires/*standards, Reproducibility of Results}, isbn = {1528-1159 (Electronic)}, author = {Cook, K. F. and Choi, S. W. and Crane, P. K. and Deyo, R. A. and Johnson, K. L. and Amtmann, D.} } @article {2200, title = {Local Dependence in an Operational CAT: Diagnosis and Implications}, journal = {Journal of Educational Measurement}, volume = {45}, number = {3}, year = {2008}, pages = {201{\textendash}223}, abstract = {

The accuracy of CAT scores can be negatively affected by local dependence if the CAT utilizes parameters that are misspecified due to the presence of local dependence and/or fails to control for local dependence in responses during the administration stage. This article evaluates the existence and effect of local dependence in a test of Mathematics Knowledge. Diagnostic tools were first used to evaluate the existence of local dependence in items that were calibrated under a 3PL model. A simulation study was then used to evaluate the effect of local dependence on the precision of examinee CAT scores when the 3PL model was used for selection and scoring. The diagnostic evaluation showed strong evidence for local dependence. The simulation suggested that local dependence in parameters had a minimal effect on CAT score precision, while local dependence in responses had a substantial effect on score precision, depending on the degree of local dependence present.

}, issn = {1745-3984}, doi = {10.1111/j.1745-3984.2008.00061.x}, url = {http://dx.doi.org/10.1111/j.1745-3984.2008.00061.x}, author = {Pommerich, Mary and Segall, Daniel O.} } @article {287, title = {Measuring physical functioning in children with spinal impairments with computerized adaptive testing}, journal = {Journal of Pediatric Orthopedics}, volume = {28}, number = {3}, year = {2008}, note = {Mulcahey, M JHaley, Stephen MDuffy, TheresaPengsheng, NiBetz, Randal RK02 HD045354-01A1/HD/NICHD NIH HHS/United StatesUnited StatesJournal of pediatric orthopedicsJ Pediatr Orthop. 2008 Apr-May;28(3):330-5.}, month = {Apr-May}, pages = {330-5}, edition = {2008/03/26}, abstract = {BACKGROUND: The purpose of this study was to assess the utility of measuring current physical functioning status of children with scoliosis and kyphosis by applying computerized adaptive testing (CAT) methods. Computerized adaptive testing uses a computer interface to administer the most optimal items based on previous responses, reducing the number of items needed to obtain a scoring estimate. METHODS: This was a prospective study of 77 subjects (0.6-19.8 years) who were seen by a spine surgeon during a routine clinic visit for progress spine deformity. Using a multidimensional version of the Pediatric Evaluation of Disability Inventory CAT program (PEDI-MCAT), we evaluated content range, accuracy and efficiency, known-group validity, concurrent validity with the Pediatric Outcomes Data Collection Instrument, and test-retest reliability in a subsample (n = 16) within a 2-week interval. RESULTS: We found the PEDI-MCAT to have sufficient item coverage in both self-care and mobility content for this sample, although most patients tended to score at the higher ends of both scales. Both the accuracy of PEDI-MCAT scores as compared with a fixed format of the PEDI (r = 0.98 for both mobility and self-care) and test-retest reliability were very high [self-care: intraclass correlation (3,1) = 0.98, mobility: intraclass correlation (3,1) = 0.99]. The PEDI-MCAT took an average of 2.9 minutes for the parents to complete. The PEDI-MCAT detected expected differences between patient groups, and scores on the PEDI-MCAT correlated in expected directions with scores from the Pediatric Outcomes Data Collection Instrument domains. CONCLUSIONS: Use of the PEDI-MCAT to assess the physical functioning status, as perceived by parents of children with complex spinal impairments, seems to be feasible and achieves accurate and efficient estimates of self-care and mobility function. Additional item development will be needed at the higher functioning end of the scale to avoid ceiling effects for older children. LEVEL OF EVIDENCE: This is a level II prospective study designed to establish the utility of computer adaptive testing as an evaluation method in a busy pediatric spine practice.}, keywords = {*Disability Evaluation, Adolescent, Child, Child, Preschool, Computer Simulation, Cross-Sectional Studies, Disabled Children/*rehabilitation, Female, Humans, Infant, Kyphosis/*diagnosis/rehabilitation, Male, Prospective Studies, Reproducibility of Results, Scoliosis/*diagnosis/rehabilitation}, isbn = {0271-6798 (Print)0271-6798 (Linking)}, author = {Mulcahey, M. J. and Haley, S. M. and Duffy, T. and Pengsheng, N. and Betz, R. R.} } @article {19, title = {Modern sequential analysis and its application to computerized adaptive testing}, journal = {Psychometrika}, volume = {73}, number = {3}, year = {2008}, pages = {473-486}, abstract = {After a brief review of recent advances in sequential analysis involving sequential generalized likelihood ratio tests, we discuss their use in psychometric testing and extend the asymptotic optimality theory of these sequential tests to the case of sequentially generated experiments, of particular interest in computerized adaptive testing.We then show how these methods can be used to design adaptive mastery tests, which are asymptotically optimal and are also shown to provide substantial improvements over currently used sequential and fixed length tests.}, author = {Bartroff, J. and Finkelman, M. and Lai, T. L.} } @article {2151, title = {A Monte Carlo Approach for Adaptive Testing With Content Constraints}, journal = {Applied Psychological Measurement}, volume = {32}, number = {6}, year = {2008}, pages = {431-446}, abstract = {

This article presents a new algorithm for computerized adaptive testing (CAT) when content constraints are present. The algorithm is based on shadow CAT methodology to meet content constraints but applies Monte Carlo methods and provides the following advantages over shadow CAT: (a) lower maximum item exposure rates, (b) higher utilization of the item pool, and (c) more robust ability estimates. Computer simulations with Law School Admission Test items demonstrated that the new algorithm (a) produces similar ability estimates as shadow CAT but with half the maximum item exposure rate and 100\% pool utilization and (b) produces more robust estimates when a high- (or low-) ability examinee performs poorly (or well) at the beginning of the test.

}, doi = {10.1177/0146621607309081}, url = {http://apm.sagepub.com/content/32/6/431.abstract}, author = {Belov, Dmitry I. and Armstrong, Ronald D. and Weissman, Alexander} } @article {471, title = {A monte carlo approach for adaptive testing with content constraints}, journal = {Applied Psychological Measurement}, volume = {32}, year = {2008}, pages = {431-446}, doi = {10.1177/0146621607309081}, author = {Belov, D. I. and Armstrong, R. D. and Weissman, A.} } @article {470, title = {A Monte Carlo approach to the design, assembly, and evaluation of multistage adaptive tests}, journal = {Applied Psychological Measurement}, volume = {32}, year = {2008}, pages = {119{\textendash}137}, author = {Belov, D.I., Armstrong, R.D.} } @article {2150, title = {A Monte Carlo Approach to the Design, Assembly, and Evaluation of Multistage Adaptive Tests}, journal = {Applied Psychological Measurement}, volume = {32}, number = {2}, year = {2008}, pages = {119-137}, abstract = {

This article presents an application of Monte Carlo methods for developing and assembling multistage adaptive tests (MSTs). A major advantage of the Monte Carlo assembly over other approaches (e.g., integer programming or enumerative heuristics) is that it provides a uniform sampling from all MSTs (or MST paths) available from a given item pool. The uniform sampling allows a statistically valid analysis for MST design and evaluation. Given an item pool, MST model, and content constraints for test assembly, three problems are addressed in this study. They are (a) the construction of item response theory (IRT) targets for each MST path, (b) the assembly of an MST such that each path satisfies content constraints and IRT constraints, and (c) an analysis of the pool and constraints to increase the number of nonoverlapping MSTs that can be assembled from the pool. The primary intent is to produce reliable measurements and enhance pool utilization.

}, doi = {10.1177/0146621606297308}, url = {http://apm.sagepub.com/content/32/2/119.abstract}, author = {Belov, Dmitry I. and Armstrong, Ronald D.} } @article {293, title = {The NAPLEX: evolution, purpose, scope, and educational implications}, journal = {American Journal of Pharmaceutical Education}, volume = {72}, number = {2}, year = {2008}, note = {Newton, David WBoyle, MariaCatizone, Carmen AHistorical ArticleUnited StatesAmerican journal of pharmaceutical educationAm J Pharm Educ. 2008 Apr 15;72(2):33.}, month = {Apr 15}, pages = {33}, edition = {2008/05/17}, abstract = {Since 2004, passing the North American Pharmacist Licensure Examination (NAPLEX) has been a requirement for earning initial pharmacy licensure in all 50 United States. The creation and evolution from 1952-2005 of the particular pharmacy competency testing areas and quantities of questions are described for the former paper-and-pencil National Association of Boards of Pharmacy Licensure Examination (NABPLEX) and the current candidate-specific computer adaptive NAPLEX pharmacy licensure examinations. A 40\% increase in the weighting of NAPLEX Blueprint Area 2 in May 2005, compared to that in the preceding 1997-2005 Blueprint, has implications for candidates{\textquoteright} NAPLEX performance and associated curricular content and instruction. New pharmacy graduates{\textquoteright} scores on the NAPLEX are neither intended nor validated to serve as a criterion for assessing or judging the quality or effectiveness of pharmacy curricula and instruction. The newest cycle of NAPLEX Blueprint revision, a continual process to ensure representation of nationwide contemporary practice, began in early 2008. It may take up to 2 years, including surveying several thousand national pharmacists, to complete.}, keywords = {*Educational Measurement, Education, Pharmacy/*standards, History, 20th Century, History, 21st Century, Humans, Licensure, Pharmacy/history/*legislation \& jurisprudence, North America, Pharmacists/*legislation \& jurisprudence, Software}, isbn = {1553-6467 (Electronic)0002-9459 (Linking)}, author = {Newton, D. W. and Boyle, M. and Catizone, C. A.} } @article {71, title = {Predicting item exposure parameters in computerized adaptive testing}, journal = {British Journal of Mathematical and Statistical Psychology}, volume = {61}, number = {1}, year = {2008}, note = {Chen, Shu-YingDoong, Shing-HwangResearch Support, Non-U.S. Gov{\textquoteright}tEnglandThe British journal of mathematical and statistical psychologyBr J Math Stat Psychol. 2008 May;61(Pt 1):75-91.}, month = {May}, pages = {75-91}, edition = {2008/05/17}, abstract = {The purpose of this study is to find a formula that describes the relationship between item exposure parameters and item parameters in computerized adaptive tests by using genetic programming (GP) - a biologically inspired artificial intelligence technique. Based on the formula, item exposure parameters for new parallel item pools can be predicted without conducting additional iterative simulations. Results show that an interesting formula between item exposure parameters and item parameters in a pool can be found by using GP. The item exposure parameters predicted based on the found formula were close to those observed from the Sympson and Hetter (1985) procedure and performed well in controlling item exposure rates. Similar results were observed for the Stocking and Lewis (1998) multinomial model for item selection and the Sympson and Hetter procedure with content balancing. The proposed GP approach has provided a knowledge-based solution for finding item exposure parameters.}, keywords = {*Algorithms, *Artificial Intelligence, Aptitude Tests/*statistics \& numerical data, Diagnosis, Computer-Assisted/*statistics \& numerical data, Humans, Models, Statistical, Psychometrics/statistics \& numerical data, Reproducibility of Results, Software}, isbn = {0007-1102 (Print)0007-1102 (Linking)}, author = {Chen, S-Y. and Doong, S. H.} } @booklet {207, title = {Preparing the implementation of computerized adaptive testing for high-stakes examinations}, journal = {Journal of Educational Evaluation for Health Professions}, volume = {5}, year = {2008}, note = {Huh, SunEditorialKorea (South)Journal of educational evaluation for health professionsJ Educ Eval Health Prof. 2008;5:1. Epub 2008 Dec 22.}, pages = {1}, edition = {2009/02/19}, isbn = {1975-5937 (Electronic)}, author = {Huh, S.} } @article {17, title = {Rotating item banks versus restriction of maximum exposure rates in computerized adaptive testing}, journal = {Spanish Journal of Psychology}, volume = {11}, number = {2}, year = {2008}, note = {Barrada, Juan RamonOlea, JulioAbad, Francisco JoseResearch Support, Non-U.S. Gov{\textquoteright}tSpainThe Spanish journal of psychologySpan J Psychol. 2008 Nov;11(2):618-25.}, pages = {618-625}, edition = {2008/11/08}, abstract = {

If examinees were to know, beforehand, part of the content of a computerized adaptive test, their estimated trait levels would then have a marked positive bias. One of the strategies to avoid this consists of dividing a large item bank into several sub-banks and rotating the sub-bank employed (Ariel, Veldkamp \& van der Linden, 2004). This strategy permits substantial improvements in exposure control at little cost to measurement accuracy, However, we do not know whether this option provides better results than using the master bank with greater restriction in the maximum exposure rates (Sympson \& Hetter, 1985). In order to investigate this issue, we worked with several simulated banks of 2100 items, comparing them, for RMSE and overlap rate, with the same banks divided in two, three... up to seven sub-banks. By means of extensive manipulation of the maximum exposure rate in each bank, we found that the option of rotating banks slightly outperformed the option of restricting maximum exposure rate of the master bank by means of the Sympson-Hetter method.

}, keywords = {*Character, *Databases, *Software Design, Aptitude Tests/*statistics \& numerical data, Bias (Epidemiology), Computing Methodologies, Diagnosis, Computer-Assisted/*statistics \& numerical data, Educational Measurement/*statistics \& numerical data, Humans, Mathematical Computing, Psychometrics/statistics \& numerical data}, isbn = {1138-7416}, author = {Barrada, J and Olea, J. and Abad, F. J.} } @article {2152, title = {Severity of Organized Item Theft in Computerized Adaptive Testing: A Simulation Study}, journal = {Applied Psychological Measurement}, volume = {32}, number = {7}, year = {2008}, pages = {543-558}, abstract = {

Criteria had been proposed for assessing the severity of possible test security violations for computerized tests with high-stakes outcomes. However, these criteria resulted from theoretical derivations that assumed uniformly randomized item selection. This study investigated potential damage caused by organized item theft in computerized adaptive testing (CAT) for two realistic item selection methods, maximum item information and a-stratified with content blocking, using the randomized method as a baseline for comparison. Damage caused by organized item theft was evaluated by the number of compromised items each examinee could encounter and the impact of the compromised items on examinees\&$\#$39; ability estimates. Severity of test security violation was assessed under self-organized and organized item theft simulation scenarios. Results indicated that though item theft could cause severe damage to CAT with either item selection method, the maximum item information method was more vulnerable to the organized item theft simulation than was the a-stratified method.

}, doi = {10.1177/0146621607311336}, url = {http://apm.sagepub.com/content/32/7/543.abstract}, author = {Qing Yi, and Jinming Zhang, and Chang, Hua-Hua} } @article {400, title = {Some new developments in adaptive testing technology}, journal = {Zeitschrift f{\"u}r Psychologie}, volume = {216}, number = {1}, year = {2008}, pages = {3-11}, abstract = {

In an ironic twist of history, modern psychological testing has returned to an adaptive format quite common when testing was not yet standardized. Important stimuli to the renewed interest in adaptive testing have been the development of item-response theory in psychometrics, which models the responses on test items using separate parameters for the items and test takers, and the use of computers in test administration, which enables us to estimate the parameter for a test taker and select the items in real time. This article reviews a selection from the latest developments in the technology of adaptive testing, such as constrained adaptive item selection, adaptive testing using rule-based item generation, multidimensional adaptive testing, adaptive use of test batteries, and the use of response times in adaptive testing.

}, keywords = {computerized adaptive testing}, author = {van der Linden, W. J.} } @article {95, title = {Strategies for controlling item exposure in computerized adaptive testing with the partial credit model}, journal = {Journal of Applied Measurement}, volume = {9}, number = {1}, year = {2008}, note = {Davis, Laurie LaughlinDodd, Barbara GUnited StatesJournal of applied measurementJ Appl Meas. 2008;9(1):1-17.}, pages = {1-17}, edition = {2008/01/09}, abstract = {Exposure control research with polytomous item pools has determined that randomization procedures can be very effective for controlling test security in computerized adaptive testing (CAT). The current study investigated the performance of four procedures for controlling item exposure in a CAT under the partial credit model. In addition to a no exposure control baseline condition, the Kingsbury-Zara, modified-within-.10-logits, Sympson-Hetter, and conditional Sympson-Hetter procedures were implemented to control exposure rates. The Kingsbury-Zara and the modified-within-.10-logits procedures were implemented with 3 and 6 item candidate conditions. The results show that the Kingsbury-Zara and modified-within-.10-logits procedures with 6 item candidates performed as well as the conditional Sympson-Hetter in terms of exposure rates, overlap rates, and pool utilization. These two procedures are strongly recommended for use with partial credit CATs due to their simplicity and strength of their results.}, keywords = {*Algorithms, *Computers, *Educational Measurement/statistics \& numerical data, Humans, Questionnaires/*standards, United States}, isbn = {1529-7713 (Print)1529-7713 (Linking)}, author = {Davis, L. L. and Dodd, B. G.} } @article {2180, title = {A Strategy for Controlling Item Exposure in Multidimensional Computerized Adaptive Testing}, journal = {Educational and Psychological Measurement}, volume = {68}, number = {2}, year = {2008}, pages = {215-232}, abstract = {

Although computerized adaptive tests have enjoyed tremendous growth, solutions for important problems remain unavailable. One problem is the control of item exposure rate. Because adaptive algorithms are designed to select optimal items, they choose items with high discriminating power. Thus, these items are selected more often than others, leading to both overexposure and underutilization of some parts of the item pool. Overused items are often compromised, creating a security problem that could threaten the validity of a test. Building on a previously proposed stratification scheme to control the exposure rate for one-dimensional tests, the authors extend their method to multidimensional tests. A strategy is proposed based on stratification in accordance with a functional of the vector of the discrimination parameter, which can be implemented with minimal computational overhead. Both theoretical and empirical validation studies are provided. Empirical results indicate significant improvement over the commonly used method of controlling exposure rate that requires only a reasonable sacrifice in efficiency.

}, doi = {10.1177/0013164407307007}, url = {http://epm.sagepub.com/content/68/2/215.abstract}, author = {Lee, Yi-Hsuan and Ip, Edward H. and Fuh, Cheng-Der} } @article {2109, title = {To Weight Or Not To Weight? Balancing Influence Of Initial Items In Adaptive Testing}, journal = {Psychometrica}, volume = {73}, year = {2008}, pages = {441-450}, abstract = {

It has been widely reported that in computerized adaptive testing some examinees may get much lower scores than they would normally if an alternative paper-and-pencil version were given. The main purpose of this investigation is to quantitatively reveal the cause for the underestimation phenomenon. The logistic models, including the 1PL, 2PL, and 3PL models, are used to demonstrate our assertions. Our analytical derivation shows that, under the maximum information item selection strategy, if an examinee failed a few items at the beginning of the test, easy but more discriminating items are likely to be administered. Such items are ineffective to move the estimate close to the true theta, unless the test is sufficiently long or a variable-length test is used. Our results also indicate that a certain weighting mechanism is necessary to make the algorithm rely less on the items administered at the beginning of the test.

}, doi = {10.1007/S11336-007-9047-7}, author = {Chang, H.-H. and Ying, Z.} } @article {783, title = {Transitioning from fixed-length questionnaires to computer-adaptive versions}, journal = {Zeitschrift f{\"u}r Psychologie \ Journal of Psychology}, volume = {216(1)}, year = {2008}, pages = {22{\textendash}28}, author = {Walter, O. B. and Holling, H.} } @article {152, title = {Using computerized adaptive testing to reduce the burden of mental health assessment}, journal = {Psychiatric Services}, volume = {59}, number = {4}, year = {2008}, note = {Gibbons, Robert DWeiss, David JKupfer, David JFrank, EllenFagiolini, AndreaGrochocinski, Victoria JBhaumik, Dulal KStover, AngelaBock, R DarrellImmekus, Jason CR01-MH-30915/MH/United States NIMHR01-MH-66302/MH/United States NIMHResearch Support, N.I.H., ExtramuralUnited StatesPsychiatric services (Washington, D.C.)Psychiatr Serv. 2008 Apr;59(4):361-8.}, month = {Apr}, pages = {361-8}, edition = {2008/04/02}, abstract = {OBJECTIVE: This study investigated the combination of item response theory and computerized adaptive testing (CAT) for psychiatric measurement as a means of reducing the burden of research and clinical assessments. METHODS: Data were from 800 participants in outpatient treatment for a mood or anxiety disorder; they completed 616 items of the 626-item Mood and Anxiety Spectrum Scales (MASS) at two times. The first administration was used to design and evaluate a CAT version of the MASS by using post hoc simulation. The second confirmed the functioning of CAT in live testing. RESULTS: Tests of competing models based on item response theory supported the scale{\textquoteright}s bifactor structure, consisting of a primary dimension and four group factors (mood, panic-agoraphobia, obsessive-compulsive, and social phobia). Both simulated and live CAT showed a 95\% average reduction (585 items) in items administered (24 and 30 items, respectively) compared with administration of the full MASS. The correlation between scores on the full MASS and the CAT version was .93. For the mood disorder subscale, differences in scores between two groups of depressed patients--one with bipolar disorder and one without--on the full scale and on the CAT showed effect sizes of .63 (p<.003) and 1.19 (p<.001) standard deviation units, respectively, indicating better discriminant validity for CAT. CONCLUSIONS: Instead of using small fixed-length tests, clinicians can create item banks with a large item pool, and a small set of the items most relevant for a given individual can be administered with no loss of information, yielding a dramatic reduction in administration time and patient and clinician burden.}, keywords = {*Diagnosis, Computer-Assisted, *Questionnaires, Adolescent, Adult, Aged, Agoraphobia/diagnosis, Anxiety Disorders/diagnosis, Bipolar Disorder/diagnosis, Female, Humans, Male, Mental Disorders/*diagnosis, Middle Aged, Mood Disorders/diagnosis, Obsessive-Compulsive Disorder/diagnosis, Panic Disorder/diagnosis, Phobic Disorders/diagnosis, Reproducibility of Results, Time Factors}, isbn = {1075-2730 (Print)}, author = {Gibbons, R. D. and Weiss, D. J. and Kupfer, D. J. and Frank, E. and Fagiolini, A. and Grochocinski, V. J. and Bhaumik, D. K. and Stover, A. and Bock, R. D. and Immekus, J. C.} } @article {132, title = {Using item banks to construct measures of patient reported outcomes in clinical trials: investigator perceptions}, journal = {Clinical Trials}, volume = {5}, number = {6}, year = {2008}, note = {Flynn, Kathryn EDombeck, Carrie BDeWitt, Esi MorganSchulman, Kevin AWeinfurt, Kevin P5U01AR052186/AR/NIAMS NIH HHS/United StatesResearch Support, N.I.H., ExtramuralEnglandClinical trials (London, England)Clin Trials. 2008;5(6):575-86.}, pages = {575-86}, edition = {2008/11/26}, abstract = {BACKGROUND: Item response theory (IRT) promises more sensitive and efficient measurement of patient-reported outcomes (PROs) than traditional approaches; however, the selection and use of PRO measures from IRT-based item banks differ from current methods of using PRO measures. PURPOSE: To anticipate barriers to the adoption of IRT item banks into clinical trials. METHODS: We conducted semistructured telephone or in-person interviews with 42 clinical researchers who published results from clinical trials in the Journal of the American Medical Association, the New England Journal of Medicine, or other leading clinical journals from July 2005 through May 2006. Interviews included a brief tutorial on IRT item banks. RESULTS: After the tutorial, 39 of 42 participants understood the novel products available from an IRT item bank, namely customized short forms and computerized adaptive testing. Most participants (38/42) thought that item banks could be useful in their clinical trials, but they mentioned several potential barriers to adoption, including economic and logistical constraints, concerns about whether item banks are better than current PRO measures, concerns about how to convince study personnel or statisticians to use item banks, concerns about FDA or sponsor acceptance, and the lack of availability of item banks validated in specific disease populations. LIMITATIONS: Selection bias might have led to more positive responses to the concept of item banks in clinical trials. CONCLUSIONS: Clinical investigators are open to a new method of PRO measurement offered in IRT item banks, but bank developers must address investigator and stakeholder concerns before widespread adoption can be expected.}, isbn = {1740-7745 (Print)}, author = {Flynn, K. E. and Dombeck, C. B. and DeWitt, E. M. and Schulman, K. A. and Weinfurt, K. P.} } @article {743, title = {Using response times for item selection in adaptive testing}, journal = {Journal of Educational and Behavioral Statistics}, volume = {33}, year = {2008}, pages = {5{\textendash}20}, author = {van der Linden, W. J.} } @article {129, title = {On using stochastic curtailment to shorten the SPRT in sequential mastery testing}, journal = {Journal of Educational and Behavioral Statistics}, volume = {33}, number = {4}, year = {2008}, pages = {442}, author = {Finkelman, M. D.} } @article {420, title = {Utilizing Rasch measurement models to develop a computer adaptive self-report of walking, climbing, and running}, journal = {Disability and Rehabilitation}, volume = {30}, number = {6}, year = {2008}, pages = {458-467}, abstract = {Purpose.The purpose of this paper is to show how the Rasch model can be used to develop a computer adaptive self-report of walking, climbing, and running.Method.Our instrument development work on the walking/climbing/running construct of the ICF Activity Measure was used to show how to develop a computer adaptive test (CAT). Fit of the items to the Rasch model and validation of the item difficulty hierarchy was accomplished using Winsteps software. Standard error was used as a stopping rule for the CAT. Finally, person abilities were connected to items difficulties using Rasch analysis {\textquoteleft}maps{\textquoteright}.Results.All but the walking one mile item fit the Rasch measurement model. A CAT was developed which selectively presented items based on the last calibrated person ability measure and was designed to stop when standard error decreased to a pre-set criterion. Finally, person ability measures were connected to the ability to perform specific walking/climbing/running activities using Rasch maps.Conclusions.Rasch measurement models can be useful in developing CAT measures for rehabilitation and disability. In addition to CATs reducing respondent burden, the connection of person measures to item difficulties may be important for the clinical interpretation of measures.Read More: http://informahealthcare.com/doi/abs/10.1080/09638280701617317}, author = {Velozo, C. A. and Wang, Y. and Lehman, L. A. and Wang, J. H.} } @article {128, title = {The Wald{\textendash}Wolfowitz Theorem Is Violated in Sequential Mastery Testing}, journal = {Sequential Analysis}, volume = {27}, number = {3}, year = {2008}, pages = {293-303}, author = {Finkelman, M.} } @inbook {1874, title = {Adaptive estimators of trait level in adaptive testing: Some proposals}, year = {2007}, note = {{PDF file, 125 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2007 GMAC Conference on Computerized Adaptive Testing.}, author = {Ra{\^\i}che, G. and Blais, J. G. and Magis, D.} } @inbook {1895, title = {Adaptive testing with the multi-unidimensional pairwise preference model}, year = {2007}, note = {{PDF file, 145 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2007 GMAC Conference on Computerized Adaptive Testing.}, author = {Stark, S. and Chernyshenko, O. S.} } @article {123, title = {Applying item response theory and computer adaptive testing: The challenges for health outcomes assessment}, journal = {Quality of Life Research}, volume = {16}, number = {Suppl 1}, year = {2007}, note = {Fayers, Peter MNetherlandsQuality of life research : an international journal of quality of life aspects of treatment, care and rehabilitationQual Life Res. 2007;16 Suppl 1:187-94. Epub 2007 Apr 7.}, pages = {187-194}, edition = {2007/04/10}, abstract = {OBJECTIVES: We review the papers presented at the NCI/DIA conference, to identify areas of controversy and uncertainty, and to highlight those aspects of item response theory (IRT) and computer adaptive testing (CAT) that require theoretical or empirical research in order to justify their application to patient reported outcomes (PROs). BACKGROUND: IRT and CAT offer exciting potential for the development of a new generation of PRO instruments. However, most of the research into these techniques has been in non-healthcare settings, notably in education. Educational tests are very different from PRO instruments, and consequently problematic issues arise when adapting IRT and CAT to healthcare research. RESULTS: Clinical scales differ appreciably from educational tests, and symptoms have characteristics distinctly different from examination questions. This affects the transferring of IRT technology. Particular areas of concern when applying IRT to PROs include inadequate software, difficulties in selecting models and communicating results, insufficient testing of local independence and other assumptions, and a need of guidelines for estimating sample size requirements. Similar concerns apply to differential item functioning (DIF), which is an important application of IRT. Multidimensional IRT is likely to be advantageous only for closely related PRO dimensions. CONCLUSIONS: Although IRT and CAT provide appreciable potential benefits, there is a need for circumspection. Not all PRO scales are necessarily appropriate targets for this methodology. Traditional psychometric methods, and especially qualitative methods, continue to have an important role alongside IRT. Research should be funded to address the specific concerns that have been identified.}, isbn = {0962-9343 (Print)}, author = {Fayers, P. M.} } @article {2174, title = {Automated Simultaneous Assembly of Multistage Testlets for a High-Stakes Licensing Examination}, journal = {Educational and Psychological Measurement}, volume = {67}, number = {1}, year = {2007}, pages = {5-20}, abstract = {

Many challenges exist for high-stakes testing programs offering continuous computerized administration. The automated assembly of test questions to exactly meet content and other requirements, provide uniformity, and control item exposure can be modeled and solved by mixed-integer programming (MIP) methods. A case study of the computerized licensing examination of the American Institute of Certified Public Accountants is offered as one application of MIP techniques for test assembly. The solution illustrates assembly for a computer-adaptive multistage testing design. However, the general form of the constraint-based solution can be modified to generate optimal test designs for paper-based or computerized administrations, regardless of the specific psychometric model. An extension of this methodology allows for long-term planning for the production and use of test content on the basis of an exact psychometric test designs and administration schedules.

}, doi = {10.1177/0013164406288162}, url = {http://epm.sagepub.com/content/67/1/5.abstract}, author = {Breithaupt, Krista and Hare, Donovan R.} } @article {2233, title = {A {\^a}{\texteuro}{\oe}Rearrangement Procedure{\^a}{\texteuro} For Scoring Adaptive Tests with Review Options}, journal = {International Journal of Testing}, volume = {7}, number = {4}, year = {2007}, pages = {387-407}, doi = {10.1080/15305050701632262}, url = {http://www.tandfonline.com/doi/abs/10.1080/15305050701632262}, author = {Papanastasiou, Elena C. and Reckase, Mark D.} } @inbook {1882, title = {Bundle models for computerized adaptive testing in e-learning assessment}, year = {2007}, note = {{PDF file, 426 KB}}, address = {D. J. Weiss (Ed.). Proceedings of the 2007 GMAC Conference on Computerized Adaptive Testing.}, author = {Scalise, K. and Wilson, M.} } @inbook {1785, title = {CAT Security: A practitioner{\textquoteright}s perspective}, year = {2007}, note = {{PDF file, 104 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2007 GMAC Conference on Computerized Adaptive Testing.}, author = {Guo, F.} } @inbook {1764, title = {Choices in CAT models in the context of educational testing}, year = {2007}, note = {{PDF file, 123 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2007 GMAC Conference on Computerized Adaptive Testing.}, author = {Theo Eggen} } @conference {114, title = {Choices in CAT models in the context of educattional testing}, booktitle = {GMAC Conference on Computerized Adaptive Testing}, year = {2007}, month = {June 7, 2007}, publisher = {Graduate Management Admission Council}, organization = {Graduate Management Admission Council}, address = {St. Paul, MN}, author = {Theo Eggen} } @inbook {1806, title = {Comparison of computerized adaptive testing and classical methods for measuring individual change}, year = {2007}, note = {{PDF file, 347 KB}}, address = {D. J. Weiss (Ed.). Proceedings of the 2007 GMAC Conference on Computerized Adaptive Testing.}, author = {Kim-Kang, G. and Weiss, D. J.} } @article {498, title = {The comparison of maximum likelihood estimation and expected a posteriori in CAT using the graded response model}, journal = {Journal of Elementary Education}, volume = {19}, year = {2007}, pages = {339-371}, author = {Chen, S-K.} } @book {1703, title = {A comparison of two methods of polytomous computerized classification testing for multiple cutscores}, year = {2007}, note = {{PDF file, 363 KB}}, address = {Unpublished doctoral dissertation, University of Minnesota}, author = {Thompson, N. A.} } @article {135, title = {Computerized adaptive personality testing: A review and illustration with the MMPI-2 Computerized Adaptive Version}, journal = {Psychological Assessment}, volume = {19}, number = {1}, year = {2007}, note = {Forbey, Johnathan DBen-Porath, Yossef SResearch Support, Non-U.S. Gov{\textquoteright}tUnited StatesPsychological assessmentPsychol Assess. 2007 Mar;19(1):14-24.}, month = {Mar}, pages = {14-24}, edition = {2007/03/21}, abstract = {Computerized adaptive testing in personality assessment can improve efficiency by significantly reducing the number of items administered to answer an assessment question. Two approaches have been explored for adaptive testing in computerized personality assessment: item response theory and the countdown method. In this article, the authors review the literature on each and report the results of an investigation designed to explore the utility, in terms of item and time savings, and validity, in terms of correlations with external criterion measures, of an expanded countdown method-based research version of the Minnesota Multiphasic Personality Inventory-2 (MMPI-2), the MMPI-2 Computerized Adaptive Version (MMPI-2-CA). Participants were 433 undergraduate college students (170 men and 263 women). Results indicated considerable item savings and corresponding time savings for the adaptive testing modalities compared with a conventional computerized MMPI-2 administration. Furthermore, computerized adaptive administration yielded comparable results to computerized conventional administration of the MMPI-2 in terms of both test scores and their validity. Future directions for computerized adaptive personality testing are discussed.}, keywords = {Adolescent, Adult, Diagnosis, Computer-Assisted/*statistics \& numerical data, Female, Humans, Male, MMPI/*statistics \& numerical data, Personality Assessment/*statistics \& numerical data, Psychometrics/statistics \& numerical data, Reference Values, Reproducibility of Results}, isbn = {1040-3590 (Print)}, author = {Forbey, J. D. and Ben-Porath, Y. S.} } @article {210, title = {Computerized adaptive testing for measuring development of young children}, journal = {Statistics in Medicine}, volume = {26}, number = {13}, year = {2007}, note = {Jacobusse, GertBuuren, Stef vanEnglandStatistics in medicineStat Med. 2007 Jun 15;26(13):2629-38.}, month = {Jun 15}, pages = {2629-38}, edition = {2006/11/30}, abstract = {Developmental indicators that are used for routine measurement in The Netherlands are usually chosen to optimally identify delayed children. Measurements on the majority of children without problems are therefore quite imprecise. This study explores the use of computerized adaptive testing (CAT) to monitor the development of young children. CAT is expected to improve the measurement precision of the instrument. We do two simulation studies - one with real data and one with simulated data - to evaluate the usefulness of CAT. It is shown that CAT selects developmental indicators that maximally match the individual child, so that all children can be measured to the same precision.}, keywords = {*Child Development, *Models, Statistical, Child, Preschool, Diagnosis, Computer-Assisted/*statistics \& numerical data, Humans, Netherlands}, isbn = {0277-6715 (Print)}, author = {Jacobusse, G. and Buuren, S.} } @article {199, title = {Computerized adaptive testing for polytomous motivation items: Administration mode effects and a comparison with short forms}, journal = {Applied Psychological Measurement}, volume = {31}, number = {5}, year = {2007}, note = {10.1177/0146621606297314Journal; Peer Reviewed Journal; Journal Article}, pages = {412-429}, abstract = {In a randomized experiment (n=515), a computerized and a computerized adaptive test (CAT) are compared. The item pool consists of 24 polytomous motivation items. Although items are carefully selected, calibration data show that Samejima{\textquoteright}s graded response model did not fit the data optimally. A simulation study is done to assess possible consequences of model misfit. CAT efficiency was studied by a systematic comparison of the CAT with two types of conventional fixed length short forms, which are created to be good CAT competitors. Results showed no essential administration mode effects. Efficiency analyses show that CAT outperformed the short forms in almost all aspects when results are aggregated along the latent trait scale. The real and the simulated data results are very similar, which indicate that the real data results are not affected by model misfit. (PsycINFO Database Record (c) 2007 APA ) (journal abstract)}, keywords = {2220 Tests \& Testing, Adaptive Testing, Attitude Measurement, computer adaptive testing, Computer Assisted Testing, items, Motivation, polytomous motivation, Statistical Validity, Test Administration, Test Forms, Test Items}, isbn = {0146-6216}, author = {Hol, A. M. and Vorst, H. C. M. and Mellenbergh, G. J.} } @article {2148, title = {Computerized Adaptive Testing for Polytomous Motivation Items: Administration Mode Effects and a Comparison With Short Forms}, journal = {Applied Psychological Measurement}, volume = {31}, number = {5}, year = {2007}, pages = {412-429}, abstract = {

In a randomized experiment (n = 515), a computerized and a computerized adaptive test (CAT) are compared. The item pool consists of 24 polytomous motivation items. Although items are carefully selected, calibration data show that Samejima\&$\#$39;s graded response model did not fit the data optimally. A simulation study is done to assess possible consequences of model misfit. CAT efficiency was studied by a systematic comparison of the CAT with two types of conventional fixed length short forms, which are created to be good CAT competitors. Results showed no essential administration mode effects. Efficiency analyses show that CAT outperformed the short forms in almost all aspects when results are aggregated along the latent trait scale. The real and the simulated data results are very similar, which indicate that the real data results are not affected by model misfit.

}, doi = {10.1177/0146621606297314}, url = {http://apm.sagepub.com/content/31/5/412.abstract}, author = {Hol, A. Michiel and Vorst, Harrie C. M. and Mellenbergh, Gideon J.} } @inbook {1949, title = {Computerized adaptive testing with the bifactor model}, year = {2007}, note = {{PDF file, 159 KB}}, address = {D. J. Weiss (Ed.). Proceedings of the 2007 GMAC Conference on Computerized Adaptive Testing.}, author = {Weiss, D. J. and Gibbons, R. D.} } @inbook {1962, title = {Computerized attribute-adaptive testing: A new computerized adaptive testing approach incorporating cognitive psychology}, year = {2007}, note = {{PDF file, 296 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2007 GMAC Conference on Computerized Adaptive Testing.}, author = {Zhou, J. and Gierl, M. J. and Cui, Y.} } @inbook {1913, title = {Computerized classification testing with composite hypotheses}, year = {2007}, note = {{PDF file, 96 KB}}, address = {D. J. Weiss (Ed.). Proceedings of the 2007 GMAC Conference on Computerized Adaptive Testing.}, author = {Thompson, N. A. and Ro, S.} } @proceedings {389, title = {Computerized classification testing with composite hypotheses}, journal = {GMAC Conference on Computerized Adaptive Testing}, year = {2007}, note = {Proceedings of the 2007 GMAC Conference on Computerized Adaptive Testing. Retrieved [date] from www. psych. umn. edu/psylabs/CATCentral}, publisher = {Graduate Management Admissions Council}, address = {St. Paul, MN}, keywords = {computerized adaptive testing}, author = {Thompson, N. A. and Ro, S.} } @article {2176, title = {Computerizing Organizational Attitude Surveys}, journal = {Educational and Psychological Measurement}, volume = {67}, number = {4}, year = {2007}, pages = {658-678}, abstract = {

Two quasi-experimental field studies were conducted to evaluate the psychometric equivalence of computerized and paper-and-pencil job satisfaction measures. The present research extends previous work in the area by providing better control of common threats to validity in quasi-experimental research on test mode effects and by evaluating a more comprehensive measurement model for job attitudes. Results of both studies demonstrated substantial equivalence of the computerized measure with the paper-and-pencil version. Implications for the practical use of computerized organizational attitude surveys are discussed.

}, doi = {10.1177/0013164406292084}, url = {http://epm.sagepub.com/content/67/4/658.abstract}, author = {Mueller, Karsten and Liebig, Christian and Hattrup, Keith} } @article {2241, title = {Conditional Item-Exposure Control in Adaptive Testing Using Item-Ineligibility Probabilities}, journal = {Journal of Educational and Behavioral Statistics}, volume = {32}, number = {4}, year = {2007}, pages = {398-418}, abstract = {

Two conditional versions of the exposure-control method with item-ineligibility constraints for adaptive testing in van der Linden and Veldkamp (2004) are presented. The first version is for unconstrained item selection, the second for item selection with content constraints imposed by the shadow-test approach. In both versions, the exposure rates of the items are controlled using probabilities of item ineligibility given \θ that adapt the exposure rates automatically to a goal value for the items in the pool. In an extensive empirical study with an adaptive version of the Law School Admission Test, the authors show how the method can be used to drive conditional exposure rates below goal values as low as 0.025. Obviously, the price to be paid for minimal exposure rates is a decrease in the accuracy of the ability estimates. This trend is illustrated with empirical data.

}, doi = {10.3102/1076998606298044}, url = {http://jeb.sagepub.com/cgi/content/abstract/32/4/398}, author = {van der Linden, Wim J. and Veldkamp, Bernard P.} } @conference {1122, title = {Cutscore location and classification accuracy in computerized classification testing}, booktitle = {Paper presented at the international meeting of the Psychometric Society}, year = {2007}, note = {{PDF file, 94 KB}}, address = {Tokyo, Japan}, author = {Ro, S. and Thompson, N. A.} } @article {730, title = {The design and evaluation of a computerized adaptive test on mobile devices}, journal = {Computers \& Education}, volume = {49.}, year = {2007}, author = {Triantafillou, E. and Georgiadou, E. and Economides, A. A.} } @inbook {1877, title = {The design of p-optimal item banks for computerized adaptive tests}, year = {2007}, address = {D. J. Weiss (Ed.), Proceedings of the 2007 GMAC Conference on Computerized Adaptive Testing. {PDF file, 211 KB}.}, author = {Reckase, M. D.} } @inbook {1783, title = {Designing optimal item pools for computerized adaptive tests with Sympson-Hetter exposure control}, year = {2007}, note = {3 MB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2007 GMAC Conference on Computerized Adaptive Testing}, author = {Gu, L. and Reckase, M. D.} } @inbook {1867, title = {Designing templates based on a taxonomy of innovative items}, year = {2007}, note = {{PDF file, 149 KB}}, address = {D. J. Weiss (Ed.). Proceedings of the 2007 GMAC Conference on Computerized Adaptive Testing.}, author = {Parshall, C. G. and Harmes, J. C.} } @article {2199, title = {Detecting Differential Speededness in Multistage Testing}, journal = {Journal of Educational Measurement}, volume = {44}, number = {2}, year = {2007}, pages = {117{\textendash}130}, abstract = {

A potential undesirable effect of multistage testing is differential speededness, which happens if some of the test takers run out of time because they receive subtests with items that are more time intensive than others. This article shows how a probabilistic response-time model can be used for estimating differences in time intensities and speed between subtests and test takers and detecting differential speededness. An empirical data set for a multistage test in the computerized CPA Exam was used to demonstrate the procedures. Although the more difficult subtests appeared to have items that were more time intensive than the easier subtests, an analysis of the residual response times did not reveal any significant differential speededness because the time limit appeared to be appropriate. In a separate analysis, within each of the subtests, we found minor but consistent patterns of residual times that are believed to be due to a warm-up effect, that is, use of more time on the initial items than they actually need.

}, issn = {1745-3984}, doi = {10.1111/j.1745-3984.2007.00030.x}, url = {http://dx.doi.org/10.1111/j.1745-3984.2007.00030.x}, author = {van der Linden, Wim J. and Breithaupt, Krista and Chuah, Siang Chee and Zhang, Yanwei} } @article {29, title = {Developing tailored instruments: item banking and computerized adaptive assessment}, journal = {Quality of Life Research}, volume = {16}, number = {Suppl 1}, year = {2007}, note = {Bjorner, Jakob BueChang, Chih-HungThissen, DavidReeve, Bryce B1R43NS047763-01/NS/United States NINDSAG015815/AG/United States NIAResearch Support, N.I.H., ExtramuralNetherlandsQuality of life research : an international journal of quality of life aspects of treatment, care and rehabilitationQual Life Res. 2007;16 Suppl 1:95-108. Epub 2007 Feb 15.}, pages = {95-108}, edition = {2007/05/29}, abstract = {Item banks and Computerized Adaptive Testing (CAT) have the potential to greatly improve the assessment of health outcomes. This review describes the unique features of item banks and CAT and discusses how to develop item banks. In CAT, a computer selects the items from an item bank that are most relevant for and informative about the particular respondent; thus optimizing test relevance and precision. Item response theory (IRT) provides the foundation for selecting the items that are most informative for the particular respondent and for scoring responses on a common metric. The development of an item bank is a multi-stage process that requires a clear definition of the construct to be measured, good items, a careful psychometric analysis of the items, and a clear specification of the final CAT. The psychometric analysis needs to evaluate the assumptions of the IRT model such as unidimensionality and local independence; that the items function the same way in different subgroups of the population; and that there is an adequate fit between the data and the chosen item response models. Also, interpretation guidelines need to be established to help the clinical application of the assessment. Although medical research can draw upon expertise from educational testing in the development of item banks and CAT, the medical field also encounters unique opportunities and challenges.}, keywords = {*Health Status, *Health Status Indicators, *Mental Health, *Outcome Assessment (Health Care), *Quality of Life, *Questionnaires, *Software, Algorithms, Factor Analysis, Statistical, Humans, Models, Statistical, Psychometrics}, isbn = {0962-9343 (Print)}, author = {Bjorner, J. B. and Chang, C-H. and Thissen, D. and Reeve, B. B.} } @article {782, title = {Development and evaluation of a computer adaptive test for {\textquotedblleft}Anxiety{\textquotedblright} (Anxiety-CAT)}, journal = {Quality of Life Research}, volume = {16}, year = {2007}, pages = {143-155}, author = {Walter, O. B. and Becker, J. and Bjorner, J. B. and Fliege, H. and Klapp, B. F. and Rose, M.} } @inbook {1763, title = {The development of a computerized adaptive test for integrity}, year = {2007}, note = {{PDf file, 290 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2007 GMAC Conference on Computerized Adaptive Testing.}, author = {Egberink, I. J. L. and Veldkamp, B. P.} } @inbook {1898, title = {Development of a multiple-component CAT for measuring foreign language proficiency (SIMTEST)}, year = {2007}, note = {{PDF file, 258 KB}}, address = {D. J. Weiss (Ed.). Proceedings of the 2007 GMAC Conference on Computerized Adaptive Testing.}, author = {Sumbling, M. and Sanz, P. and Viladrich, M. C. and Doval, E. and Riera, L.} } @article {125, title = {The effect of including pretest items in an operational computerized adaptive test: Do different ability examinees spend different amounts of time on embedded pretest items?}, journal = {Educational Assessment}, volume = {12}, number = {2}, year = {2007}, pages = {161-173}, publisher = {Lawrence Erlbaum: US}, abstract = {The purpose of this study was to examine the effect of pretest items on response time in an operational, fixed-length, time-limited computerized adaptive test (CAT). These pretest items are embedded within the CAT, but unlike the operational items, are not tailored to the examinee{\textquoteright}s ability level. If examinees with higher ability levels need less time to complete these items than do their counterparts with lower ability levels, they will have more time to devote to the operational test questions. Data were from a graduate admissions test that was administered worldwide. Data from both quantitative and verbal sections of the test were considered. For the verbal section, examinees in the lower ability groups spent systematically more time on their pretest items than did those in the higher ability groups, though for the quantitative section the differences were less clear. (PsycINFO Database Record (c) 2007 APA, all rights reserved)}, keywords = {ability, operational computerized adaptive test, pretest items, time}, isbn = {1062-7197 (Print); 1532-6977 (Electronic)}, author = {Ferdous, A. A. and Plake, B. S. and Chang, S-R.} } @booklet {316, title = {The effect of using item parameters calibrated from paper administrations in computer adaptive test administrations}, journal = {Journal of Technology, Learning, and Assessment}, volume = {5}, number = {7}, year = {2007}, pages = {1-29}, abstract = {Computer administered tests are becoming increasingly prevalent as computer technology becomes more readily available on a large scale. For testing programs that utilize both computer and paper administrations, mode effects are problematic in that they can result in examinee scores that are artificially inflated or deflated. As such, researchers have engaged in extensive studies of whether scores differ across paper and computer presentations of the same tests. The research generally seems to indicate that the more complicated it is to present or take a test on computer, the greater the possibility of mode effects. In a computer adaptive test, mode effects may be a particular concern if items are calibrated using item responses obtained from one administration mode (i.e., paper), and those parameters are then used operationally in a different administration mode (i.e., computer). This paper studies the suitability of using parameters calibrated from a paper administration for item selection and scoring in a computer adaptive administration, for two tests with lengthy passages that required navigation in the computer administration. The results showed that the use of paper calibrated parameters versus computer calibrated parameters in computer adaptive administrations had small to moderate effects on the reliability of examinee scores, at fairly short test lengths. This effect was generally diminished for longer test lengths. However, the results suggest that in some cases, some loss in reliability might be inevitable if paper-calibrated parameters are used in computer adaptive administrations.}, keywords = {Mode effects}, author = {Pommerich, M} } @article {2075, title = {The Effect of Using Item Parameters Calibrated from Paper Administrations in Computer Adaptive Test Administrations}, journal = {The Journal of Technology, Learning, and Assessment}, volume = {5}, number = {7}, year = {2007}, abstract = {

Computer administered tests are becoming increasingly prevalent as computer technology becomes more readily available on a large scale. For testing programs that utilize both computer and paper administrations, mode effects are problematic in that they can
result in examinee scores that are artificially inflated or deflated. As such, researchers have engaged in extensive studies of whether scores differ across paper and computer presentations of the same tests. The research generally seems to indicate that the more
complicated it is to present or take a test on computer, the greater the possibility of mode effects. In a computer adaptive test, mode effects may be a particular concern if items are calibrated using item responses obtained from one administration mode (i.e., paper), and those parameters are then used operationally in a different administration mode (i.e., computer). This paper studies the suitability of using parameters calibrated from a paper administration for item selection and scoring in a computer adaptive administration, for two tests with lengthy passages that required navigation in the computer administration. The results showed that the use of paper calibrated parameters versus computer calibrated parameters in computer adaptive administrations had small to
moderate effects on the reliability of examinee scores, at fairly short test lengths. This effect was generally diminished for longer test lengths. However, the results suggest that in some cases, some loss in reliability might be inevitable if paper-calibrated parameters
are used in computer adaptive administrations.\ 

}, author = {Pommerich, M} } @article {2196, title = {Estimating the Standard Error of the Maximum Likelihood Ability Estimator in Adaptive Testing Using the Posterior-Weighted Test Information Function}, journal = {Educational and Psychological Measurement}, volume = {67}, number = {6}, year = {2007}, pages = {958-975}, abstract = {

The standard error of the maximum likelihood ability estimator is commonly estimated by evaluating the test information function at an examinee\&$\#$39;s current maximum likelihood estimate (a point estimate) of ability. Because the test information function evaluated at the point estimate may differ from the test information function evaluated at an examinee\&$\#$39;s true ability value, the estimated standard error may be biased under certain conditions. This is of particular concern in adaptive testing because the height of the test information function is expected to be higher at the current estimate of ability than at the actual value of ability. This article proposes using the posterior-weighted test information function in computing the standard error of the maximum likelihood ability estimator for adaptive test sessions. A simulation study showed that the proposed approach provides standard error estimates that are less biased and more efficient than those provided by the traditional point estimate approach.

}, doi = {10.1177/0013164407301544}, url = {http://epm.sagepub.com/content/67/6/958.abstract}, author = {Penfield, Randall D.} } @article {111, title = {Evaluation of computer adaptive testing systems}, journal = {International Journal of Web-Based Learning and Teaching Technologies}, volume = {2}, number = {1}, year = {2007}, pages = {70-87}, publisher = {IGI Global: US}, abstract = {Many educational organizations are trying to reduce the cost of the exams, the workload and delay of scoring, and the human errors. Also, they try to increase the accuracy and efficiency of the testing. Recently, most examination organizations use computer adaptive testing (CAT) as the method for large scale testing. This article investigates the current state of CAT systems and identifies their strengths and weaknesses. It evaluates 10 CAT systems using an evaluation framework of 15 domains categorized into three dimensions: educational, technical, and economical. The results show that the majority of the CAT systems give priority to security, reliability, and maintainability. However, they do not offer to the examinee any advanced support and functionalities. Also, the feedback to the examinee is limited and the presentation of the items is poor. Recommendations are made in order to enhance the overall quality of a CAT system. For example, alternative multimedia items should be available so that the examinee would choose a preferred media type. Feedback could be improved by providing more information to the examinee or providing information anytime the examinee wished. (PsycINFO Database Record (c) 2007 APA, all rights reserved)}, keywords = {computer adaptive testing systems, examination organizations, systems evaluation}, isbn = {1548-1093 (Print); 1548-1107 (Electronic)}, author = {Economides, A. A. and Roupas, C} } @article {163, title = {An exploration and realization of computerized adaptive testing with cognitive diagnosis}, journal = {Acta Psychologica Sinica}, volume = {39}, number = {4}, year = {2007}, pages = {747-753}, abstract = { An increased attention paid to {\textquotedblleft}cognitive bugs behavior,{\textquotedblright} appears to lead to an increased research interests in diagnostic testing based on Item Response Theory(IRT)that combines cognitive psychology and psychometrics. The study of cognitive diagnosis were applied mainly to Paper-and-Pencil (P\&P) testing. Rarely has it been applied to computerized adaptive testing CAT), To our knowledge, no research on CAT with cognitive diagnosis has been conducted in China. Since CAT is more efficient and accurate than P\&P testing, there is important to develop an application technique for cognitive diagnosis suitable for CAT. This study attempts to construct a preliminary CAT system for cognitive diagnosis.With the help of the methods for {\textquotedblleft} Diagnosis first, Ability estimation second {\textquotedblright}, the knowledge state conversion diagram was used to describe all the possible knowledge states in a domain of interest and the relation among the knowledge states at the diagnosis stage, where a new strategy of item selection based-on the algorithm of Depth First Search was proposed. On the other hand, those items that contain attributes which the examinee has not mastered were removed in ability estimation. At the stage of accurate ability estimation, all the items answered by each examinee not only matched his/her ability estimated value, but also were limited to those items whose attributes have been mastered by the examinee.We used Monte Carlo Simulation to simulate all the data of the three different structures of cognitive attributes in this study. These structures were tree-shaped, forest-shaped, and some isolated vertices (that are related to simple Q-matrix). Both tree-shaped and isolated vertices structure were derived from actual cases, while forest-shaped structure was a generalized simulation. 3000 examinees and 3000 items were simulated in the experiment of tree-shaped, 2550 examinees and 3100 items in forest-shaped, and 2000 examinees and 2500 items in isolated vertices. The maximum test length was all assumed as 30 items for all those experiments. The difficulty parameters and the logarithm of the discrimination were drawn from the standard normal distribution N(0,1). There were 100 examinees of each attribute pattern in the experiment of tree-shaped and 50 examinees of each attribute pattern in forest-shaped. In isolated vertices, 2000 examinees are students come from actual case.To assess the behaviors of the proposed diagnostic approach, three assessment indices were used. They are attribute pattern classification agreement rate (abr.APCAR), the Recovery (the average of the absolute deviation between the estimated value and the true value) and the average test length (abr. Length).Parts of results of Monte Carlo study were as follows.For the attribute structure of tree-shaped, APCAR is 84.27\%,Recovery is 0.17,Length is 24.80.For the attribute structure of forest-shaped, APCAR is 84.02\%,Recovery is 0.172,Length is 23.47.For the attribute structure of isolated vertices, APCAR is 99.16\%,Recorvery is 0.256,Length is 27.32.As show the above, we can conclude that the results are favorable. The rate of cognitive diagnosis accuracy has exceeded 80\% in each experiment, and the Recovery is also good. Therefore, it should be an acceptable idea to construct an initiatory CAT system for cognitive diagnosis, if we use the methods for {\textquotedblleft}Diagnosis first, Ability estimation second {\textquotedblright} with the help of both knowledge state conversion diagram and the new strategy of item selection based-on the algorithm of Depth First Search}, author = {Haijing, L. and Shuliang, D.} } @inbook {1762, title = {Exploring potential designs for multi-form structure computerized adaptive tests with uniform item exposure}, year = {2007}, note = {{PDF file, 295 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2007 GMAC Conference on Computerized Adaptive Testing.}, author = {Edwards, M. C. and Thissen, D.} } @article {49, title = {The future of outcomes measurement: item banking, tailored short-forms, and computerized adaptive assessment}, journal = {Quality of Life Research}, volume = {16}, number = {Suppl 1}, year = {2007}, pages = {133-141}, abstract = {The use of item banks and computerized adaptive testing (CAT) begins with clear definitions of important outcomes, and references those definitions to specific questions gathered into large and well-studied pools, or {\textquotedblleft}banks{\textquotedblright} of items. Items can be selected from the bank to form customized short scales, or can be administered in a sequence and length determined by a computer programmed for precision and clinical relevance. Although far from perfect, such item banks can form a common definition and understanding of human symptoms and functional problems such as fatigue, pain, depression, mobility, social function, sensory function, and many other health concepts that we can only measure by asking people directly. The support of the National Institutes of Health (NIH), as witnessed by its cooperative agreement with measurement experts through the NIH Roadmap Initiative known as PROMIS (www.nihpromis.org), is a big step in that direction. Our approach to item banking and CAT is practical; as focused on application as it is on science or theory. From a practical perspective, we frequently must decide whether to re-write and retest an item, add more items to fill gaps (often at the ceiling of the measure), re-test a bank after some modifications, or split up a bank into units that are more unidimensional, yet less clinically relevant or complete. These decisions are not easy, and yet they are rarely unforgiving. We encourage people to build practical tools that are capable of producing multiple short form measures and CAT administrations from common banks, and to further our understanding of these banks with various clinical populations and ages, so that with time the scores that emerge from these many activities begin to have not only a common metric and range, but a shared meaning and understanding across users. In this paper, we provide an overview of item banking and CAT, discuss our approach to item banking and its byproducts, describe testing options, discuss an example of CAT for fatigue, and discuss models for long term sustainability of an entity such as PROMIS. Some barriers to success include limitations in the methods themselves, controversies and disagreements across approaches, and end-user reluctance to move away from the familiar. }, isbn = {0962-9343}, author = {Cella, D. and Gershon, R. C. and Lai, J-S. and Choi, S. W.} } @article {2271, title = {Hypothetischer Einsatz adaptiven Testens bei der Messung von Bildungsstandards in Mathematik [Hypothetical use of adaptive testing for the measurement of educational standards in mathematics] . }, journal = {Zeitschrift f{\"u}r Erziehungswissenschaft}, volume = {8}, year = {2007}, pages = {169-184}, author = {Frey, A. and Ehmke, T.} } @inbook {1810, title = {ICAT: An adaptive testing procedure to allow the identification of idiosyncratic knowledge patterns}, year = {2007}, note = {{PDF file, 161 KB}}, address = {D. J. Weiss (Ed.). Proceedings of the 2007 GMAC Conference on Computerized Adaptive Testing.}, author = {Kingsbury, G. G. and Houser, R.L.} } @inbook {1878, title = {Implementing the Graduate Management Admission Test{\textregistered} computerized adaptive test}, year = {2007}, note = {{PDF file, 135 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2007 GMAC Conference on Computerized Adaptive Testing.}, author = {Rudner, L. M.} } @article {52, title = {Improving patient reported outcomes using item response theory and computerized adaptive testing}, journal = {Journal of Rheumatology}, volume = {34}, number = {6}, year = {2007}, note = {Chakravarty, Eliza FBjorner, Jakob BFries, James FAr052158/ar/niamsConsensus Development ConferenceResearch Support, N.I.H., ExtramuralCanadaThe Journal of rheumatologyJ Rheumatol. 2007 Jun;34(6):1426-31.}, month = {Jun}, pages = {1426-31}, edition = {2007/06/07}, abstract = {OBJECTIVE: Patient reported outcomes (PRO) are considered central outcome measures for both clinical trials and observational studies in rheumatology. More sophisticated statistical models, including item response theory (IRT) and computerized adaptive testing (CAT), will enable critical evaluation and reconstruction of currently utilized PRO instruments to improve measurement precision while reducing item burden on the individual patient. METHODS: We developed a domain hierarchy encompassing the latent trait of physical function/disability from the more general to most specific. Items collected from 165 English-language instruments were evaluated by a structured process including trained raters, modified Delphi expert consensus, and then patient evaluation. Each item in the refined data bank will undergo extensive analysis using IRT to evaluate response functions and measurement precision. CAT will allow for real-time questionnaires of potentially smaller numbers of questions tailored directly to each individual{\textquoteright}s level of physical function. RESULTS: Physical function/disability domain comprises 4 subdomains: upper extremity, trunk, lower extremity, and complex activities. Expert and patient review led to consensus favoring use of present-tense "capability" questions using a 4- or 5-item Likert response construct over past-tense "performance"items. Floor and ceiling effects, attribution of disability, and standardization of response categories were also addressed. CONCLUSION: By applying statistical techniques of IRT through use of CAT, existing PRO instruments may be improved to reduce questionnaire burden on the individual patients while increasing measurement precision that may ultimately lead to reduced sample size requirements for costly clinical trials.}, keywords = {*Rheumatic Diseases/physiopathology/psychology, Clinical Trials, Data Interpretation, Statistical, Disability Evaluation, Health Surveys, Humans, International Cooperation, Outcome Assessment (Health Care)/*methods, Patient Participation/*methods, Research Design/*trends, Software}, isbn = {0315-162X (Print)}, author = {Chakravarty, E. F. and Bjorner, J. B. and Fries, J.F.} } @article {363, title = {The initial development of an item bank to assess and screen for psychological distress in cancer patients}, journal = {Psycho-Oncology}, volume = {16}, number = {8}, year = {2007}, note = {10.1002/pon.1117Journal; Peer Reviewed Journal; Journal Article}, pages = {724-732}, abstract = {Psychological distress is a common problem among cancer patients. Despite the large number of instruments that have been developed to assess distress, their utility remains disappointing. This study aimed to use Rasch models to develop an item-bank which would provide the basis for better means of assessing psychological distress in cancer patients. An item bank was developed from eight psychological distress questionnaires using Rasch analysis to link common items. Items from the questionnaires were added iteratively with common items as anchor points and misfitting items (infit mean square > 1.3) removed, and unidimensionality assessed. A total of 4914 patients completed the questionnaires providing an initial pool of 83 items. Twenty items were removed resulting in a final pool of 63 items. Good fit was demonstrated and no additional factor structure was evident from the residuals. However, there was little overlap between item locations and person measures, since items mainly targeted higher levels of distress. The Rasch analysis allowed items to be pooled and generated a unidimensional instrument for measuring psychological distress in cancer patients. Additional items are required to more accurately assess patients across the whole continuum of psychological distress. (PsycINFO Database Record (c) 2007 APA ) (journal abstract)}, keywords = {3293 Cancer, cancer patients, Distress, initial development, Item Response Theory, Models, Neoplasms, Patients, Psychological, psychological distress, Rasch, Stress}, isbn = {1057-9249}, author = {Smith, A. B. and Rush, R. and Velikova, G. and Wall, L. and Wright, E. P. and Stark, D. and Selby, P. and Sharpe, M.} } @inbook {1914, title = {Investigating CAT designs to achieve comparability with a paper test}, year = {2007}, note = {{PDF file, 141 KB}}, address = {In D. J. Weiss (Ed.). Proceedings of the 2007 GMAC Conference on Computerized Adaptive Testing.}, author = {Thompson, T. and Way, W. D.} } @article {86, title = {IRT health outcomes data analysis project: an overview and summary}, journal = {Quality of Life Research}, volume = {16}, number = {Suppl. 1}, year = {2007}, note = {Cook, Karon FTeal, Cayla RBjorner, Jakob BCella, DavidChang, Chih-HungCrane, Paul KGibbons, Laura EHays, Ron DMcHorney, Colleen AOcepek-Welikson, KatjaRaczek, Anastasia ETeresi, Jeanne AReeve, Bryce B1U01AR52171-01/AR/United States NIAMSR01 (CA60068)/CA/United States NCIY1-PC-3028-01/PC/United States NCIResearch Support, N.I.H., ExtramuralNetherlandsQuality of life research : an international journal of quality of life aspects of treatment, care and rehabilitationQual Life Res. 2007;16 Suppl 1:121-32. Epub 2007 Mar 10.}, pages = {121-132}, edition = {2007/03/14}, abstract = {BACKGROUND: In June 2004, the National Cancer Institute and the Drug Information Association co-sponsored the conference, "Improving the Measurement of Health Outcomes through the Applications of Item Response Theory (IRT) Modeling: Exploration of Item Banks and Computer-Adaptive Assessment." A component of the conference was presentation of a psychometric and content analysis of a secondary dataset. OBJECTIVES: A thorough psychometric and content analysis was conducted of two primary domains within a cancer health-related quality of life (HRQOL) dataset. RESEARCH DESIGN: HRQOL scales were evaluated using factor analysis for categorical data, IRT modeling, and differential item functioning analyses. In addition, computerized adaptive administration of HRQOL item banks was simulated, and various IRT models were applied and compared. SUBJECTS: The original data were collected as part of the NCI-funded Quality of Life Evaluation in Oncology (Q-Score) Project. A total of 1,714 patients with cancer or HIV/AIDS were recruited from 5 clinical sites. MEASURES: Items from 4 HRQOL instruments were evaluated: Cancer Rehabilitation Evaluation System-Short Form, European Organization for Research and Treatment of Cancer Quality of Life Questionnaire, Functional Assessment of Cancer Therapy and Medical Outcomes Study Short-Form Health Survey. RESULTS AND CONCLUSIONS: Four lessons learned from the project are discussed: the importance of good developmental item banks, the ambiguity of model fit results, the limits of our knowledge regarding the practical implications of model misfit, and the importance in the measurement of HRQOL of construct definition. With respect to these lessons, areas for future research are suggested. The feasibility of developing item banks for broad definitions of health is discussed.}, keywords = {*Data Interpretation, Statistical, *Health Status, *Quality of Life, *Questionnaires, *Software, Female, HIV Infections/psychology, Humans, Male, Neoplasms/psychology, Outcome Assessment (Health Care)/*methods, Psychometrics, Stress, Psychological}, isbn = {0962-9343 (Print)}, author = {Cook, K. F. and Teal, C. R. and Bjorner, J. B. and Cella, D. and Chang, C-H. and Crane, P. K. and Gibbons, L. E. and Hays, R. D. and McHorney, C. A. and Ocepek-Welikson, K. and Raczek, A. E. and Teresi, J. A. and Reeve, B. B.} } @conference {1202, title = {Item selection in computerized classification testing}, booktitle = {Paper presented at the Conference on High Stakes Testing}, year = {2007}, note = {{PDF file, 87KB}}, address = {University of Nebraska}, author = {Thompson, N. A.} } @article {386, title = {Methodological issues for building item banks and computerized adaptive scales}, journal = {Quality of Life Research}, volume = {16}, number = {S1}, year = {2007}, pages = {109-119, }, abstract = {Abstract This paper reviews important methodological considerations for developing item banks and computerized adaptive scales (commonly called computerized adaptive tests in the educational measurement literature, yielding the acronym CAT), including issues of the reference population, dimensionality, dichotomous versus polytomous response scales, differential item functioning (DIF) and conditional scoring, mode effects, the impact of local dependence, and innovative approaches to assessment using CATs in health outcomes research.}, isbn = {0962-93431573-2649}, author = {Thissen, D. and Reeve, B. B. and Bjorner, J. B. and Chang, C-H.} } @article {18, title = {Methods for restricting maximum exposure rate in computerized adaptative testing}, journal = {Methodology: European Journal of Research Methods for the Behavioral and Social Sciences}, volume = {3}, number = {1}, year = {2007}, pages = {14-23}, publisher = {Hogrefe \& Huber Publishers GmbH: Germany}, abstract = {The Sympson-Hetter (1985) method provides a means of controlling maximum exposure rate of items in Computerized Adaptive Testing. Through a series of simulations, control parameters are set that mark the probability of administration of an item on being selected. This method presents two main problems: it requires a long computation time for calculating the parameters and the maximum exposure rate is slightly above the fixed limit. Van der Linden (2003) presented two alternatives which appear to solve both of the problems. The impact of these methods in the measurement accuracy has not been tested yet. We show how these methods over-restrict the exposure of some highly discriminating items and, thus, the accuracy is decreased. It also shown that, when the desired maximum exposure rate is near the minimum possible value, these methods offer an empirical maximum exposure rate clearly above the goal. A new method, based on the initial estimation of the probability of administration and the probability of selection of the items with the restricted method (Revuelta \& Ponsoda, 1998), is presented in this paper. It can be used with the Sympson-Hetter method and with the two van der Linden{\textquoteright}s methods. This option, when used with Sympson-Hetter, speeds the convergence of the control parameters without decreasing the accuracy. (PsycINFO Database Record (c) 2007 APA, all rights reserved)}, keywords = {computerized adaptive testing, item bank security, item exposure control, overlap rate, Sympson-Hetter method}, isbn = {1614-1881 (Print); 1614-2241 (Electronic)}, author = {Barrada, J and Olea, J. and Ponsoda, V.} } @inbook {1745, title = {The modified maximum global discrimination index method for cognitive diagnostic computerized adaptive testing}, year = {2007}, note = {{PDF file, 172 KB}}, address = {~ D. J. Weiss (Ed.). ~Proceedings of the 2007 GMAC Conference on Computerized Adaptive Testing.}, author = {Cheng, Y and Chang, Hua-Hua} } @article {2208, title = {A multiple objective test assembly approach for exposure control problems in computerized adaptive testing}, number = {2007-1}, year = {2007}, institution = {Cito}, address = {Arnhem, The Netherlands}, author = {Veldkamp, B. P. and Verschoor, Angela J. and Theo Eggen} } @article {2175, title = {Mutual Information Item Selection in Adaptive Classification Testing}, journal = {Educational and Psychological Measurement}, volume = {67}, number = {1}, year = {2007}, pages = {41-58}, abstract = {

A general approach for item selection in adaptive multiple-category classification tests is provided. The approach uses mutual information (MI), a special case of the Kullback-Leibler distance, or relative entropy. MI works efficiently with the sequential probability ratio test and alleviates the difficulties encountered with using other local- and global-information measures in the multiple-category classification setting. Results from simulation studies using three item selection methods, Fisher information (FI), posterior-weighted FI (FIP), and MI, are provided for an adaptive four-category classification test. Both across and within the four classification categories, it is shown that in general, MI item selection classifies the highest proportion of examinees correctly and yields the shortest test lengths. The next best performance is observed for FIP item selection, followed by FI.

}, doi = {10.1177/0013164406288164}, url = {http://epm.sagepub.com/content/67/1/41.abstract}, author = {Weissman, Alexander} } @article {568, title = {An NCME instructional module on multistage testing}, journal = {Educational Measurement: Issues and Practice}, volume = {26(2)}, year = {2007}, note = {$\#$HE07044}, pages = {44-52}, author = {Hendrickson, A.} } @inbook {1865, title = {A new delivery system for CAT}, year = {2007}, note = {{PDF file, 248 KB}}, address = {D. J. Weiss (Ed.). Proceedings of the 2007 GMAC Conference on Computerized Adaptive Testing.}, author = {Park, J.} } @inbook {1881, title = {Nonparametric online item calibration}, year = {2007}, note = {8 MB}}, address = {D. J. Weiss (Ed.). Proceedings of the 2007 GMAC Conference on Computerized Adaptive Testing. }, author = {Samejima, F.} } @inbook {1754, title = {Partial order knowledge structures for CAT applications}, year = {2007}, note = {{PDF file, 475 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2007 GMAC Conference on Computerized Adaptive Testing.}, author = {Desmarais, M. C. and Pu, X, and Blais, J-G.} } @inbook {1799, title = {Patient-reported outcomes measurement and computerized adaptive testing: An application of post-hoc simulation to a diagnostic screening instrument}, year = {2007}, note = {{PDF file, 203 KB}}, address = {D. J. Weiss (Ed.). Proceedings of the 2007 GMAC Conference on Computerized Adaptive Testing.}, author = {Immekus, J. C. and Gibbons, R. D. and Rush, J. A.} } @article {53, title = {Patient-reported outcomes measurement and management with innovative methodologies and technologies}, journal = {Quality of Life Research}, volume = {16 Suppl 1}, year = {2007}, note = {Chang, Chih-HungR21CA113191/CA/NCI NIH HHS/United StatesResearch Support, N.I.H., ExtramuralNetherlandsQuality of life research : an international journal of quality of life aspects of treatment, care and rehabilitationQual Life Res. 2007;16 Suppl 1:157-66. Epub 2007 May 26.}, pages = {157-66}, edition = {2007/05/29}, abstract = {Successful integration of modern psychometrics and advanced informatics in patient-reported outcomes (PRO) measurement and management can potentially maximize the value of health outcomes research and optimize the delivery of quality patient care. Unlike the traditional labor-intensive paper-and-pencil data collection method, item response theory-based computerized adaptive testing methodologies coupled with novel technologies provide an integrated environment to collect, analyze and present ready-to-use PRO data for informed and shared decision-making. This article describes the needs, challenges and solutions for accurate, efficient and cost-effective PRO data acquisition and dissemination means in order to provide critical and timely PRO information necessary to actively support and enhance routine patient care in busy clinical settings.}, keywords = {*Health Status, *Outcome Assessment (Health Care), *Quality of Life, *Software, Computer Systems/*trends, Health Insurance Portability and Accountability Act, Humans, Patient Satisfaction, Questionnaires, United States}, isbn = {0962-9343 (Print)0962-9343 (Linking)}, author = {Chang, C-H.} } @article {51, title = {The Patient-Reported Outcomes Measurement Information System (PROMIS): progress of an NIH Roadmap cooperative group during its first two years}, journal = {Medical Care}, volume = {45}, number = {5 Suppl 1}, year = {2007}, pages = {S3-S11}, abstract = {The National Institutes of Health (NIH) Patient-Reported Outcomes Measurement Information System (PROMIS) Roadmap initiative (www.nihpromis.org) is a 5-year cooperative group program of research designed to develop, validate, and standardize item banks to measure patient-reported outcomes (PROs) relevant across common medical conditions. In this article, we will summarize the organization and scientific activity of the PROMIS network during its first 2 years.}, author = {Cella, D. and Yount, S. and Rothrock, N. and Gershon, R. C. and Cook, K. F. and Reeve, B. and Ader, D. and Fries, J.F. and Bruce, B. and Rose, M.} } @article {2178, title = {A Practitioner{\textquoteright}s Guide for Variable-length Computerized Classification Testing}, journal = {Practical Assessment Research and Evaluation}, volume = {12}, year = {2007}, pages = {1-13}, author = {Thompson, N. A.} } @article {387, title = {A practitioner{\textquoteright}s guide to variable-length computerized classification testing}, journal = {Practical Assessment, Research and Evaluation}, volume = {12 }, number = {1}, year = {2007}, month = {7/1/2009}, chapter = {January, 2007}, abstract = {Variable-length computerized classification tests, CCTs, (Lin \& Spray, 2000; Thompson, 2006) are a powerful and efficient approach to testing for the purpose of classifying examinees into groups. CCTs are designed by the specification of at least five technical components: psychometric model, calibrated item bank, starting point, item selection algorithm, and termination criterion. Several options exist for each of these CCT components, creating a myriad of possible designs. Confusion among designs is exacerbated by the lack of a standardized nomenclature. This article outlines the components of a CCT, common options for each component, and the interaction of options for different components, so that practitioners may more efficiently design CCTs. It also offers a suggestion of nomenclature. }, keywords = {CAT, classification, computer adaptive testing, computerized adaptive testing, Computerized classification testing}, author = {Thompson, N. A.} } @article {583, title = {Prospective evaluation of the am-pac-cat in outpatient rehabilitation settings}, journal = {Physical Therapy}, volume = {87}, year = {2007}, pages = {385-398}, author = {Jette, A., and Haley, S. and Tao, W. and Ni, P. and Moed, R. and Meyers, D. and Zurek, M.} } @article {328, title = {Psychometric evaluation and calibration of health-related quality of life item banks: plans for the Patient-Reported Outcomes Measurement Information System (PROMIS)}, journal = {Medical Care}, volume = {45}, number = {5 Suppl 1}, year = {2007}, note = {Reeve, Bryce BHays, Ron DBjorner, Jakob BCook, Karon FCrane, Paul KTeresi, Jeanne AThissen, DavidRevicki, Dennis AWeiss, David JHambleton, Ronald KLiu, HonghuGershon, RichardReise, Steven PLai, Jin-sheiCella, DavidPROMIS Cooperative GroupAG015815/AG/United States NIAResearch Support, N.I.H., ExtramuralUnited StatesMedical careMed Care. 2007 May;45(5 Suppl 1):S22-31.}, month = {May}, pages = {S22-31}, edition = {2007/04/20}, abstract = {BACKGROUND: The construction and evaluation of item banks to measure unidimensional constructs of health-related quality of life (HRQOL) is a fundamental objective of the Patient-Reported Outcomes Measurement Information System (PROMIS) project. OBJECTIVES: Item banks will be used as the foundation for developing short-form instruments and enabling computerized adaptive testing. The PROMIS Steering Committee selected 5 HRQOL domains for initial focus: physical functioning, fatigue, pain, emotional distress, and social role participation. This report provides an overview of the methods used in the PROMIS item analyses and proposed calibration of item banks. ANALYSES: Analyses include evaluation of data quality (eg, logic and range checking, spread of response distribution within an item), descriptive statistics (eg, frequencies, means), item response theory model assumptions (unidimensionality, local independence, monotonicity), model fit, differential item functioning, and item calibration for banking. RECOMMENDATIONS: Summarized are key analytic issues; recommendations are provided for future evaluations of item banks in HRQOL assessment.}, keywords = {*Health Status, *Information Systems, *Quality of Life, *Self Disclosure, Adolescent, Adult, Aged, Calibration, Databases as Topic, Evaluation Studies as Topic, Female, Humans, Male, Middle Aged, Outcome Assessment (Health Care)/*methods, Psychometrics, Questionnaires/standards, United States}, isbn = {0025-7079 (Print)}, author = {Reeve, B. B. and Hays, R. D. and Bjorner, J. B. and Cook, K. F. and Crane, P. K. and Teresi, J. A. and Thissen, D. and Revicki, D. A. and Weiss, D. J. and Hambleton, R. K. and Liu, H. and Gershon, R. C. and Reise, S. P. and Lai, J. S. and Cella, D.} } @article {343, title = {Psychometric properties of an emotional adjustment measure: An application of the graded response model}, journal = {European Journal of Psychological Assessment}, volume = {23}, number = {1}, year = {2007}, pages = {39-46}, publisher = {Hogrefe \& Huber Publishers GmbH: Germany}, abstract = {Item response theory (IRT) provides valuable methods for the analysis of the psychometric properties of a psychological measure. However, IRT has been mainly used for assessing achievements and ability rather than personality factors. This paper presents an application of the IRT to a personality measure. Thus, the psychometric properties of a new emotional adjustment measure that consists of a 28-six graded response items is shown. Classical test theory (CTT) analyses as well as IRT analyses are carried out. Samejima{\textquoteright}s (1969) graded-response model has been used for estimating item parameters. Results show that the bank of items fulfills model assumptions and fits the data reasonably well, demonstrating the suitability of the IRT models for the description and use of data originating from personality measures. In this sense, the model fulfills the expectations that IRT has undoubted advantages: (1) The invariance of the estimated parameters, (2) the treatment given to the standard error of measurement, and (3) the possibilities offered for the construction of computerized adaptive tests (CAT). The bank of items shows good reliability. It also shows convergent validity compared to the Eysenck Personality Inventory (EPQ-A; Eysenck \& Eysenck, 1975) and the Big Five Questionnaire (BFQ; Caprara, Barbaranelli, \& Borgogni, 1993). (PsycINFO Database Record (c) 2007 APA, all rights reserved)}, keywords = {computerized adaptive tests, Emotional Adjustment, Item Response Theory, Personality Measures, personnel recruitment, Psychometrics, Samejima{\textquoteright}s graded response model, test reliability, validity}, isbn = {1015-5759 (Print)}, author = {Rubio, V. J. and Aguado, D. and Hontangas, P. M. and Hern{\'a}ndez, J. M.} } @article {333, title = {Relative precision, efficiency and construct validity of different starting and stopping rules for a computerized adaptive test: The GAIN Substance Problem Scale}, journal = {Journal of Applied Measurement}, volume = {8}, number = {1}, year = {2007}, pages = {48-65}, abstract = {Substance abuse treatment programs are being pressed to measure and make clinical decisions more efficiently about an increasing array of problems. This computerized adaptive testing (CAT) simulation examined the relative efficiency, precision and construct validity of different starting and stopping rules used to shorten the Global Appraisal of Individual Needs{\textquoteright} (GAIN) Substance Problem Scale (SPS) and facilitate diagnosis based on it. Data came from 1,048 adolescents and adults referred to substance abuse treatment centers in 5 sites. CAT performance was evaluated using: (1) average standard errors, (2) average number of items, (3) bias in personmeasures, (4) root mean squared error of person measures, (5) Cohen{\textquoteright}s kappa to evaluate CAT classification compared to clinical classification, (6) correlation between CAT and full-scale measures, and (7) construct validity of CAT classification vs. clinical classification using correlations with five theoretically associated instruments. Results supported both CAT efficiency and validity.}, keywords = {My article}, author = {Riley, B. B. and Conrad, K. J. and Bezruczko, N. and Dennis, M. L.} } @article {555, title = {A review of item exposure control strategies for computerized adaptive testing developed from 1983 to 2005}, journal = {Journal of Technology,Learning, and Assessment,}, volume = { 5(8)}, year = {2007}, note = {http://www.jtla.org. {PDF file, 326 KB}}, abstract = {Since researchers acknowledged the several advantages of computerized adaptive testing (CAT) over traditional linear test administration, the issue of item exposure control has received increased attention. Due to CAT{\textquoteright}s underlying philosophy, particular items in the item pool may be presented too often and become overexposed, while other items are rarely selected by the CAT algorithm and thus become underexposed. Several item exposure control strategies have been presented in the literature aiming to prevent overexposure of some items and to increase the use rate of rarely or never selected items. This paper reviews such strategies that appeared in the relevant literature from 1983 to 2005. The focus of this paper is on studies that have been conducted in order to evaluate the effectiveness of item exposure control strategies for dichotomous scoring, polytomous scoring and testlet-based CAT systems. In addition, the paper discusses the strengths and weaknesses of each strategy group using examples from simulation studies. No new research is presented but rather a compendium of models is reviewed with an overall objective of providing researchers of this field, especially newcomers, a wide view of item exposure control strategies.}, author = {Georgiadou, E. and Triantafillou, E. and Economides, A. A.} } @inbook {1924, title = {The shadow-test approach: A universal framework for implementing adaptive testing}, year = {2007}, note = {{PDF file, 461 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2007 GMAC Conference on Computerized Adaptive Testing.}, author = {van der Linden, W. J.} } @inbook {1821, title = {Some thoughts on controlling item exposure in adaptive testing}, year = {2007}, note = {{PDF file, 357 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2007 GMAC Conference on Computerized Adaptive Testing.}, author = {Lewis, C.} } @inbook {1926, title = {Statistical aspects of adaptive testing}, year = {2007}, address = {C. R. Rao and S. Sinharay (Eds.), Handbook of statistics (Vol. 27: Psychometrics) (pp. 801838). Amsterdam: North-Holland.}, author = {van der Linden, W. J. and Glas, C. A. W.} } @article {54, title = {A system for interactive assessment and management in palliative care}, journal = {Journal of Pain Symptom Management}, volume = {33}, number = {6}, year = {2007}, note = {Chang, Chih-HungBoni-Saenz, Alexander ADurazo-Arvizu, Ramon ADesHarnais, SusanLau, Denys TEmanuel, Linda LR21CA113191/CA/United States NCIResearch Support, N.I.H., ExtramuralReviewUnited StatesJournal of pain and symptom managementJ Pain Symptom Manage. 2007 Jun;33(6):745-55. Epub 2007 Mar 23.}, pages = {745-55}, edition = {2007/03/16}, abstract = {The availability of psychometrically sound and clinically relevant screening, diagnosis, and outcome evaluation tools is essential to high-quality palliative care assessment and management. Such data will enable us to improve patient evaluations, prognoses, and treatment selections, and to increase patient satisfaction and quality of life. To accomplish these goals, medical care needs more precise, efficient, and comprehensive tools for data acquisition, analysis, interpretation, and management. We describe a system for interactive assessment and management in palliative care (SIAM-PC), which is patient centered, model driven, database derived, evidence based, and technology assisted. The SIAM-PC is designed to reliably measure the multiple dimensions of patients{\textquoteright} needs for palliative care, and then to provide information to clinicians, patients, and the patients{\textquoteright} families to achieve optimal patient care, while improving our capacity for doing palliative care research. This system is innovative in its application of the state-of-the-science approaches, such as item response theory and computerized adaptive testing, to many of the significant clinical problems related to palliative care.}, keywords = {*Needs Assessment, Humans, Medical Informatics/*organization \& administration, Palliative Care/*organization \& administration}, isbn = {0885-3924 (Print)}, author = {Chang, C-H. and Boni-Saenz, A. A. and Durazo-Arvizu, R. A. and DesHarnais, S. and Lau, D. T. and Emanuel, L. L.} } @article {306, title = {Test design optimization in CAT early stage with the nominal response model}, journal = {Applied Psychological Measurement}, volume = {31}, number = {3}, year = {2007}, pages = {213-232}, publisher = {Sage Publications: US}, abstract = {The early stage of computerized adaptive testing (CAT) refers to the phase of the trait estimation during the administration of only a few items. This phase can be characterized by bias and instability of estimation. In this study, an item selection criterion is introduced in an attempt to lessen this instability: the D-optimality criterion. A polytomous unconstrained CAT simulation is carried out to evaluate this criterion{\textquoteright}s performance under different test premises. The simulation shows that the extent of early stage instability depends primarily on the quality of the item pool information and its size and secondarily on the item selection criteria. The efficiency of the D-optimality criterion is similar to the efficiency of other known item selection criteria. Yet, it often yields estimates that, at the beginning of CAT, display a more robust performance against instability. (PsycINFO Database Record (c) 2007 APA, all rights reserved)}, keywords = {computerized adaptive testing, nominal response model, robust performance, test design optimization}, isbn = {0146-6216 (Print)}, author = {Passos, V. L. and Berger, M. P. F. and Tan, F. E.} } @article {2149, title = {Two-Phase Item Selection Procedure for Flexible Content Balancing in CAT}, journal = {Applied Psychological Measurement}, volume = {31}, number = {6}, year = {2007}, pages = {467-482}, abstract = {

Content balancing is an important issue in the design and implementation of computerized adaptive testing (CAT). Content-balancing techniques that have been applied in fixed content balancing, where the number of items from each content area is fixed, include constrained CAT (CCAT), the modified multinomial model (MMM), modified constrained CAT (MCCAT), and others. In this article, four methods are proposed to address the flexible content-balancing issue with the a-stratification design, named STR_C. The four methods are MMM+, an extension of MMM; MCCAT+, an extension of MCCAT; the TPM method, a two-phase content-balancing method using MMM in both phases; and the TPF method, a two-phase content-balancing method using MMM in the first phase and MCCAT in the second. Simulation results show that all of the methods work well in content balancing, and TPF performs the best in item exposure control and item pool utilization while maintaining measurement precision.

}, doi = {10.1177/0146621606292933}, url = {http://apm.sagepub.com/content/31/6/467.abstract}, author = {Ying Cheng, and Chang, Hua-Hua and Qing Yi,} } @article {508, title = {Two-phase item selection procedure for flexible content balancing in CAT}, journal = {Applied Psychological. Measurement}, volume = {3}, year = {2007}, pages = {467{\textendash}482}, author = {Cheng, Y and Chang, Hua-Hua and Yi, Q.} } @inbook {1894, title = {Up-and-down procedures for approximating optimal designs using person-response functions}, year = {2007}, note = {{PDF file, 1,042 KB}}, address = {D. J. Weiss (Ed.). Proceedings of the 2007 GMAC Conference on Computerized Adaptive Testing.}, author = {Sheng, Y. and Flournoy, N. and Osterlind, S. J.} } @inbook {1753, title = {Use of CAT in dynamic testing}, year = {2007}, note = {{PDF file, 133 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 2007 GMAC Conference on Computerized Adaptive Testing.}, author = {De Beer, M.} } @proceedings {83, title = {The use of computerized adaptive testing to assess psychopathology using the Global Appraisal of Individual Needs}, journal = {American Evaluation Association}, year = {2007}, month = {November}, publisher = {American Evaluation Association }, address = {Portland, OR USA}, author = {Conrad, K. J. and Riley, B. B. and Dennis, M. L.} } @inbook {1863, title = {Validity and decision issues in selecting a CAT measurement model}, year = {2007}, note = {{PDF file, 977 KB}}, address = {D. J. Weiss (Ed.). Proceedings of the 2007 GMAC Conference on Computerized Adaptive Testing.}, author = {Olsen, J. B. and Bunderson, C. V} } @article {189, title = {Adaptive success control in computerized adaptive testing}, journal = {Psychology Science}, volume = {48}, number = {4}, year = {2006}, pages = {436-450}, publisher = {Pabst Science Publishers: Germany}, abstract = {In computerized adaptive testing (CAT) procedures within the framework of probabilistic test theory the difficulty of an item is adjusted to the ability of the respondent, with the aim of maximizing the amount of information generated per item, thereby also increasing test economy and test reasonableness. However, earlier research indicates that respondents might feel over-challenged by a constant success probability of p = 0.5 and therefore cannot come to a sufficiently high answer certainty within a reasonable timeframe. Consequently response time per item increases, which -- depending on the test material -- can outweigh the benefit of administering optimally informative items. Instead of a benefit, the result of using CAT procedures could be a loss of test economy. Based on this problem, an adaptive success control algorithm was designed and tested, adapting the success probability to the working style of the respondent. Persons who need higher answer certainty in order to come to a decision are detected and receive a higher success probability, in order to minimize the test duration (not the number of items as in classical CAT). The method is validated on the re-analysis of data from the Adaptive Matrices Test (AMT, Hornke, Etzel \& Rettig, 1999) and by the comparison between an AMT version using classical CAT and an experimental version using Adaptive Success Control. The results are discussed in the light of psychometric and psychological aspects of test quality. (PsycINFO Database Record (c) 2007 APA, all rights reserved)}, keywords = {adaptive success control, computerized adaptive testing, Psychometrics}, isbn = {0033-3018 (Print)}, author = {H{\"a}usler, Joachim} } @article {310, title = {Applying Bayesian item selection approaches to adaptive tests using polytomous items}, journal = {Applied Measurement in Education}, volume = {19}, number = {1}, year = {2006}, pages = {1-20}, publisher = {Lawrence Erlbaum: US}, abstract = {This study applied the maximum expected information (MEI) and the maximum posterior- weighted information (MPI) approaches of computer adaptive testing item selection to the case of a test using polytomous items following the partial credit model. The MEI and MPI approaches are described. A simulation study compared the efficiency of ability estimation using the MEI and MPI approaches to the traditional maximal item information (MII) approach. The results of the simulation study indicated that the MEI and MPI approaches led to a superior efficiency of ability estimation compared with the MII approach. The superiority of the MEI and MPI approaches over the MII approach was greatest when the bank contained items having a relatively peaked information function. (PsycINFO Database Record (c) 2007 APA, all rights reserved)}, keywords = {adaptive tests, Bayesian item selection, computer adaptive testing, maximum expected information, polytomous items, posterior weighted information}, isbn = {0895-7347 (Print); 1532-4818 (Electronic)}, author = {Penfield, R. D.} } @article {401, title = {Assembling a computerized adaptive testing item pool as a set of linear tests}, journal = {Journal of Educational and Behavioral Statistics}, volume = {31}, number = {1}, year = {2006}, pages = {81-99}, publisher = {Sage Publications: US}, abstract = {Test-item writing efforts typically results in item pools with an undesirable correlational structure between the content attributes of the items and their statistical information. If such pools are used in computerized adaptive testing (CAT), the algorithm may be forced to select items with less than optimal information, that violate the content constraints, and/or have unfavorable exposure rates. Although at first sight somewhat counterintuitive, it is shown that if the CAT pool is assembled as a set of linear test forms, undesirable correlations can be broken down effectively. It is proposed to assemble such pools using a mixed integer programming model with constraints that guarantee that each test meets all content specifications and an objective function that requires them to have maximal information at a well-chosen set of ability values. An empirical example with a previous master pool from the Law School Admission Test (LSAT) yielded a CAT with nearly uniform bias and mean-squared error functions for the ability estimator and item-exposure rates that satisfied the target for all items in the pool. }, keywords = {Algorithms, computerized adaptive testing, item pool, linear tests, mathematical models, statistics, Test Construction, Test Items}, isbn = {1076-9986 (Print)}, author = {van der Linden, W. J. and Ariel, A. and Veldkamp, B. P.} } @article {820, title = {Assessing CAT Test Security Severity}, journal = {Applied Psychological Measurement}, volume = {30(1)}, year = {2006}, pages = {62{\textendash}63}, author = {Yi, Q., Zhang, J. and Chang, Hua-Hua} } @booklet {1406, title = {A CAT with personality and attitude}, year = {2006}, note = {$\#$HO06-01 .}, address = {Enschede, The Netherlands: PrintPartners Ipskamp B}, author = {Hol, A. M.} } @article {247, title = {Comparing methods of assessing differential item functioning in a computerized adaptive testing environment}, journal = {Journal of Educational Measurement}, volume = {43}, number = {3}, year = {2006}, pages = {245-264}, publisher = {Blackwell Publishing: United Kingdom}, abstract = {Mantel-Haenszel and SIBTEST, which have known difficulty in detecting non-unidirectional differential item functioning (DIF), have been adapted with some success for computerized adaptive testing (CAT). This study adapts logistic regression (LR) and the item-response-theory-likelihood-ratio test (IRT-LRT), capable of detecting both unidirectional and non-unidirectional DIF, to the CAT environment in which pretest items are assumed to be seeded in CATs but not used for trait estimation. The proposed adaptation methods were evaluated with simulated data under different sample size ratios and impact conditions in terms of Type I error, power, and specificity in identifying the form of DIF. The adapted LR and IRT-LRT procedures are more powerful than the CAT version of SIBTEST for non-unidirectional DIF detection. The good Type I error control provided by IRT-LRT under extremely unequal sample sizes and large impact is encouraging. Implications of these and other findings are discussed. all rights reserved)}, keywords = {computerized adaptive testing, educational testing, item response theory likelihood ratio test, logistic regression, trait estimation, unidirectional \& non-unidirectional differential item functioning}, isbn = {0022-0655 (Print)}, author = {Lei, P-W. and Chen, S-Y. and Yu, L.} } @article {2198, title = {Comparing Methods of Assessing Differential Item Functioning in a Computerized Adaptive Testing Environment}, journal = {Journal of Educational Measurement}, volume = {43}, number = {3}, year = {2006}, pages = {245{\textendash}264}, abstract = {

Mantel-Haenszel and SIBTEST, which have known difficulty in detecting non-unidirectional differential item functioning (DIF), have been adapted with some success for computerized adaptive testing (CAT). This study adapts logistic regression (LR) and the item-response-theory-likelihood-ratio test (IRT-LRT), capable of detecting both unidirectional and non-unidirectional DIF, to the CAT environment in which pretest items are assumed to be seeded in CATs but not used for trait estimation. The proposed adaptation methods were evaluated with simulated data under different sample size ratios and impact conditions in terms of Type I error, power, and specificity in identifying the form of DIF. The adapted LR and IRT-LRT procedures are more powerful than the CAT version of SIBTEST for non-unidirectional DIF detection. The good Type I error control provided by IRT-LRT under extremely unequal sample sizes and large impact is encouraging. Implications of these and other findings are discussed.

}, issn = {1745-3984}, doi = {10.1111/j.1745-3984.2006.00015.x}, url = {http://dx.doi.org/10.1111/j.1745-3984.2006.00015.x}, author = {Lei, Pui-Wa and Chen, Shu-Ying and Yu, Lan} } @article {164, title = {The comparison among item selection strategies of CAT with multiple-choice items}, journal = {Acta Psychologica Sinica}, volume = {38}, number = {5}, year = {2006}, pages = {778-783}, publisher = {Science Press: China}, abstract = {The initial purpose of comparing item selection strategies for CAT was to increase the efficiency of tests. As studies continued, however, it was found that increasing the efficiency of item bank using was also an important goal of comparing item selection strategies. These two goals often conflicted. The key solution was to find a strategy with which both goals could be accomplished. The item selection strategies for graded response model in this study included: the average of the difficulty orders matching with the ability; the medium of the difficulty orders matching with the ability; maximum information; A stratified (average); and A stratified (medium). The evaluation indexes used for comparison included: the bias of ability estimates for the true; the standard error of ability estimates; the average items which the examinees have administered; the standard deviation of the frequency of items selected; and sum of the indices weighted. Using the Monte Carlo simulation method, we obtained some data and computer iterated the data 20 times each under the conditions that the item difficulty parameters followed the normal distribution and even distribution. The results were as follows; The results indicated that no matter difficulty parameters followed the normal distribution or even distribution. Every type of item selection strategies designed in this research had its strong and weak points. In general evaluation, under the condition that items were stratified appropriately, A stratified (medium) (ASM) had the best effect. (PsycINFO Database Record (c) 2007 APA, all rights reserved)}, keywords = {CAT, computerized adaptive testing, graded response model, item selection strategies, multiple choice items}, isbn = {0439-755X (Print)}, author = {Hai-qi, D. and De-zhi, C. and Shuliang, D. and Taiping, D.} } @conference {1068, title = {A comparison of online calibration methods for a CAT}, booktitle = {Presented at the National Council on Measurement on Education}, year = {2006}, address = {San Francisco, CA}, author = {Morgan, D. L. and Way, W. D. and Augemberg, K.E.} } @article {2216, title = {Comparison of the Psychometric Properties of Several Computer-Based Test Designs for Credentialing Exams With Multiple Purposes}, journal = {Applied Measurement in Education}, volume = {19}, number = {3}, year = {2006}, pages = {203-220}, doi = {10.1207/s15324818ame1903_3}, url = {http://www.tandfonline.com/doi/abs/10.1207/s15324818ame1903_3}, author = {Jodoin, Michael G. and Zenisky, April and Hambleton, Ronald K.} } @article {165, title = {Computer adaptive testing improved accuracy and precision of scores over random item selection in a physical functioning item bank}, journal = {Journal of Clinical Epidemiology}, volume = {59}, number = {11}, year = {2006}, pages = {1174-1182}, isbn = {08954356}, author = {Haley, S. and Ni, P. and Hambleton, R. K. and Slavin, M. and Jette, A.} } @article {172, title = {Computer adaptive testing improved accuracy and precision of scores over random item selection in a physical functioning item bank}, journal = {Journal of Clinical Epidemiology}, volume = {59}, number = {11}, year = {2006}, note = {Haley, Stephen MNi, PengshengHambleton, Ronald KSlavin, Mary DJette, Alan MK02 hd45354-01/hd/nichdR01 hd043568/hd/nichdComparative StudyResearch Support, N.I.H., ExtramuralResearch Support, U.S. Gov{\textquoteright}t, Non-P.H.S.EnglandJournal of clinical epidemiologyJ Clin Epidemiol. 2006 Nov;59(11):1174-82. Epub 2006 Jul 11.}, month = {Nov}, pages = {1174-82}, edition = {2006/10/10}, abstract = {BACKGROUND AND OBJECTIVE: Measuring physical functioning (PF) within and across postacute settings is critical for monitoring outcomes of rehabilitation; however, most current instruments lack sufficient breadth and feasibility for widespread use. Computer adaptive testing (CAT), in which item selection is tailored to the individual patient, holds promise for reducing response burden, yet maintaining measurement precision. We calibrated a PF item bank via item response theory (IRT), administered items with a post hoc CAT design, and determined whether CAT would improve accuracy and precision of score estimates over random item selection. METHODS: 1,041 adults were interviewed during postacute care rehabilitation episodes in either hospital or community settings. Responses for 124 PF items were calibrated using IRT methods to create a PF item bank. We examined the accuracy and precision of CAT-based scores compared to a random selection of items. RESULTS: CAT-based scores had higher correlations with the IRT-criterion scores, especially with short tests, and resulted in narrower confidence intervals than scores based on a random selection of items; gains, as expected, were especially large for low and high performing adults. CONCLUSION: The CAT design may have important precision and efficiency advantages for point-of-care functional assessment in rehabilitation practice settings.}, keywords = {*Recovery of Function, Activities of Daily Living, Adolescent, Adult, Aged, Aged, 80 and over, Confidence Intervals, Factor Analysis, Statistical, Female, Humans, Male, Middle Aged, Outcome Assessment (Health Care)/*methods, Rehabilitation/*standards, Reproducibility of Results, Software}, isbn = {0895-4356 (Print)}, author = {Haley, S. M. and Ni, P. and Hambleton, R. K. and Slavin, M. D. and Jette, A. M.} } @inbook {109, title = {Computer-based testing}, booktitle = {Handbook of multimethod measurement in psychology}, volume = {xiv}, year = {2006}, note = {Using Smart Source ParsingHandbook of multimethod measurement in psychology. (pp. 87-100). Washington, DC : American Psychological Association, [URL:http://www.apa.org/books]. xiv, 553 pp}, pages = {87-100}, publisher = {American Psychological Association}, organization = {American Psychological Association}, address = {Washington D.C. USA}, abstract = {(From the chapter) There has been a proliferation of research designed to explore and exploit opportunities provided by computer-based assessment. This chapter provides an overview of the diverse efforts by researchers in this area. It begins by describing how paper-and-pencil tests can be adapted for administration by computers. Computerization provides the important advantage that items can be selected so they are of appropriate difficulty for each examinee. Some of the psychometric theory needed for computerized adaptive testing is reviewed. Then research on innovative computerized assessments is summarized. These assessments go beyond multiple-choice items by using formats made possible by computerization. Then some hardware and software issues are described, and finally, directions for future work are outlined. (PsycINFO Database Record (c) 2006 APA )}, keywords = {Adaptive Testing computerized adaptive testing, Computer Assisted Testing, Experimentation, Psychometrics, Theories}, author = {F Drasgow and Chuah, S. C.} } @article {176, title = {Computerized adaptive testing for follow-up after discharge from inpatient rehabilitation: I. Activity outcomes}, journal = {Archives of Physical Medicine and Rehabilitation}, volume = {87}, number = {8}, year = {2006}, note = {Haley, Stephen MSiebens, HilaryCoster, Wendy JTao, WeiBlack-Schaffer, Randie MGandek, BarbaraSinclair, Samuel JNi, PengshengK0245354-01/phsR01 hd043568/hd/nichdResearch Support, N.I.H., ExtramuralUnited StatesArchives of physical medicine and rehabilitationArch Phys Med Rehabil. 2006 Aug;87(8):1033-42.}, month = {Aug}, pages = {1033-42}, edition = {2006/08/01}, abstract = {OBJECTIVE: To examine score agreement, precision, validity, efficiency, and responsiveness of a computerized adaptive testing (CAT) version of the Activity Measure for Post-Acute Care (AM-PAC-CAT) in a prospective, 3-month follow-up sample of inpatient rehabilitation patients recently discharged home. DESIGN: Longitudinal, prospective 1-group cohort study of patients followed approximately 2 weeks after hospital discharge and then 3 months after the initial home visit. SETTING: Follow-up visits conducted in patients{\textquoteright} home setting. PARTICIPANTS: Ninety-four adults who were recently discharged from inpatient rehabilitation, with diagnoses of neurologic, orthopedic, and medically complex conditions. INTERVENTIONS: Not applicable. MAIN OUTCOME MEASURES: Summary scores from AM-PAC-CAT, including 3 activity domains of movement and physical, personal care and instrumental, and applied cognition were compared with scores from a traditional fixed-length version of the AM-PAC with 66 items (AM-PAC-66). RESULTS: AM-PAC-CAT scores were in good agreement (intraclass correlation coefficient model 3,1 range, .77-.86) with scores from the AM-PAC-66. On average, the CAT programs required 43\% of the time and 33\% of the items compared with the AM-PAC-66. Both formats discriminated across functional severity groups. The standardized response mean (SRM) was greater for the movement and physical fixed form than the CAT; the effect size and SRM of the 2 other AM-PAC domains showed similar sensitivity between CAT and fixed formats. Using patients{\textquoteright} own report as an anchor-based measure of change, the CAT and fixed length formats were comparable in responsiveness to patient-reported change over a 3-month interval. CONCLUSIONS: Accurate estimates for functional activity group-level changes can be obtained from CAT administrations, with a considerable reduction in administration time.}, keywords = {*Activities of Daily Living, *Adaptation, Physiological, *Computer Systems, *Questionnaires, Adult, Aged, Aged, 80 and over, Chi-Square Distribution, Factor Analysis, Statistical, Female, Humans, Longitudinal Studies, Male, Middle Aged, Outcome Assessment (Health Care)/*methods, Patient Discharge, Prospective Studies, Rehabilitation/*standards, Subacute Care/*standards}, isbn = {0003-9993 (Print)}, author = {Haley, S. M. and Siebens, H. and Coster, W. J. and Tao, W. and Black-Schaffer, R. M. and Gandek, B. and Sinclair, S. J. and Ni, P.} } @article {352, title = {Computerized adaptive testing of diabetes impact: a feasibility study of Hispanics and non-Hispanics in an active clinic population}, journal = {Quality of Life Research}, volume = {15}, number = {9}, year = {2006}, note = {Schwartz, CarolynWelch, GarrySantiago-Kelley, PaulaBode, RitaSun, Xiaowu1 r43 dk066874-01/dk/niddkResearch Support, N.I.H., ExtramuralNetherlandsQuality of life research : an international journal of quality of life aspects of treatment, care and rehabilitationQual Life Res. 2006 Nov;15(9):1503-18. Epub 2006 Sep 26.}, month = {Nov}, pages = {1503-18}, edition = {2006/10/13}, abstract = {BACKGROUND: Diabetes is a leading cause of death and disability in the US and is twice as common among Hispanic Americans as non-Hispanics. The societal costs of diabetes provide an impetus for developing tools that can improve patient care and delay or prevent diabetes complications. METHODS: We implemented a feasibility study of a Computerized Adaptive Test (CAT) to measure diabetes impact using a sample of 103 English- and 97 Spanish-speaking patients (mean age = 56.5, 66.5\% female) in a community medical center with a high proportion of minority patients (28\% African-American). The 37 items of the Diabetes Impact Survey were translated using forward-backward translation and cognitive debriefing. Participants were randomized to receive either the full-length tool or the Diabetes-CAT first, in the patient{\textquoteright}s native language. RESULTS: The number of items and the amount of time to complete the survey for the CAT was reduced to one-sixth the amount for the full-length tool in both languages, across disease severity. Confirmatory Factor Analysis confirmed that the Diabetes Impact Survey is unidimensional. The Diabetes-CAT demonstrated acceptable internal consistency reliability, construct validity, and discriminant validity in the overall sample, although subgroup analyses suggested that the English sample data evidenced higher levels of reliability and validity than the Spanish sample and issues with discriminant validity in the Spanish sample. Differential Item Function analysis revealed differences in responses tendencies by language group in 3 of the 37 items. Participant interviews suggested that the Spanish-speaking patients generally preferred the paper survey to the computer-assisted tool, and were twice as likely to experience difficulties understanding the items. CONCLUSIONS: While the Diabetes-CAT demonstrated clear advantages in reducing respondent burden as compared to the full-length tool, simplifying the item bank will be necessary for enhancing the feasibility of the Diabetes-CAT for use with low literacy patients.}, keywords = {*Computers, *Hispanic Americans, *Quality of Life, Adult, Aged, Data Collection/*methods, Diabetes Mellitus/*psychology, Feasibility Studies, Female, Humans, Language, Male, Middle Aged}, isbn = {0962-9343 (Print)}, author = {Schwartz, C. and Welch, G. and Santiago-Kelley, P. and Bode, R. and Sun, X.} } @article {2132, title = {Computerized adaptive testing under nonparametric IRT models}, journal = {Psychometrika}, volume = {71}, year = {2006}, pages = {121-137}, author = {Xu, X. and Douglas, J.} } @conference {884, title = {Constraints-weighted information method for item selection of severely constrained computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2006}, address = {San Francisco}, author = {Cheng, Y and Chang, Hua-Hua and Wang, X. B.} } @inbook {1752, title = {Designing computerized adaptive tests}, year = {2006}, address = {S.M. Downing and T. M. Haladyna (Eds.), Handbook of test development. New Jersey: Lawrence Erlbaum Associates.}, author = {Davey, T. and Pitoniak, M. J.} } @article {2173, title = {Effects of Estimation Bias on Multiple-Category Classification With an IRT-Based Adaptive Classification Procedure}, journal = {Educational and Psychological Measurement}, volume = {66}, number = {4}, year = {2006}, pages = {545-564}, abstract = {

The effects of five ability estimators, that is, maximum likelihood estimator, weighted likelihood estimator, maximum a posteriori, expected a posteriori, and Owen\&$\#$39;s sequential estimator, on the performances of the item response theory\–based adaptive classification procedure on multiple categories were studied via simulations. The following results were found. (a) The Bayesian estimators were more likely to misclassify examinees into an inward category because of their inward biases, when a fixed start value of zero was assigned to every examinee. (b) When moderately accurate start values were available, however, Bayesian estimators produced classifications that were slightly more accurate than was the maximum likelihood estimator or weighted likelihood estimator. Expected a posteriori was the procedure that produced the most accurate results among the three Bayesian methods. (c) All five estimators produced equivalent efficiencies in terms of number of items required, which was 50 or more items except for abilities that were less than -2.00 or greater than 2.00.

}, doi = {10.1177/0013164405284031}, url = {http://epm.sagepub.com/content/66/4/545.abstract}, author = {Yang, Xiangdong and Poggio, John C. and Glasnapp, Douglas R.} } @article {399, title = {Equating scores from adaptive to linear tests}, journal = {Applied Psychological Measurement}, volume = {30}, number = {6}, year = {2006}, pages = {493-508}, publisher = {Sage Publications: US}, abstract = {Two local methods for observed-score equating are applied to the problem of equating an adaptive test to a linear test. In an empirical study, the methods were evaluated against a method based on the test characteristic function (TCF) of the linear test and traditional equipercentile equating applied to the ability estimates on the adaptive test for a population of test takers. The two local methods were generally best. Surprisingly, the TCF method performed slightly worse than the equipercentile method. Both methods showed strong bias and uniformly large inaccuracy, but the TCF method suffered from extra error due to the lower asymptote of the test characteristic function. It is argued that the worse performances of the two methods are a consequence of the fact that they use a single equating transformation for an entire population of test takers and therefore have to compromise between the individual score distributions. }, keywords = {computerized adaptive testing, equipercentile equating, local equating, score reporting, test characteristic function}, isbn = {0146-6216 (Print)}, author = {van der Linden, W. J.} } @article {246, title = {Estimation of an examinee{\textquoteright}s ability in the web-based computerized adaptive testing program IRT-CAT}, journal = {J Educ Eval Health Prof}, volume = {3}, year = {2006}, note = {Lee, Yoon-HwanPark, Jung-HoPark, In-YongKorea (South)Journal of educational evaluation for health professionsJ Educ Eval Health Prof. 2006;3:4. Epub 2006 Nov 22.}, pages = {4}, edition = {2006/01/01}, abstract = {We developed a program to estimate an examinee s ability in order to provide freely available access to a web-based computerized adaptive testing (CAT) program. We used PHP and Java Script as the program languages, PostgresSQL as the database management system on an Apache web server and Linux as the operating system. A system which allows for user input and searching within inputted items and creates tests was constructed. We performed an ability estimation on each test based on a Rasch model and 2- or 3-parametric logistic models. Our system provides an algorithm for a web-based CAT, replacing previous personal computer-based ones, and makes it possible to estimate an examinee{\textquoteright}s ability immediately at the end of test.}, isbn = {1975-5937 (Electronic)}, author = {Lee, Y. H. and Park, J. H. and Park, I. Y.} } @article {233, title = {An evaluation of a patient-reported outcomes found computerized adaptive testing was efficient in assessing osteoarthritis impact}, journal = {Journal of Clinical Epidemiology}, volume = {59}, number = {7}, year = {2006}, pages = {715-723}, abstract = {BACKGROUND AND OBJECTIVES: Evaluate a patient-reported outcomes questionnaire that uses computerized adaptive testing (CAT) to measure the impact of osteoarthritis (OA) on functioning and well-being. MATERIALS AND METHODS: OA patients completed 37 questions about the impact of OA on physical, social and role functioning, emotional well-being, and vitality. Questionnaire responses were calibrated and scored using item response theory, and two scores were estimated: a Total-OA score based on patients{\textquoteright} responses to all 37 questions, and a simulated CAT-OA score where the computer selected and scored the five most informative questions for each patient. Agreement between Total-OA and CAT-OA scores was assessed using correlations. Discriminant validity of Total-OA and CAT-OA scores was assessed with analysis of variance. Criterion measures included OA pain and severity, patient global assessment, and missed work days. RESULTS: Simulated CAT-OA and Total-OA scores correlated highly (r = 0.96). Both Total-OA and simulated CAT-OA scores discriminated significantly between patients differing on the criterion measures. F-statistics across criterion measures ranged from 39.0 (P < .001) to 225.1 (P < .001) for the Total-OA score, and from 40.5 (P < .001) to 221.5 (P < .001) for the simulated CAT-OA score. CONCLUSIONS: CAT methods produce valid and precise estimates of the impact of OA on functioning and well-being with significant reduction in response burden.}, isbn = {08954356}, author = {Kosinski, M. and Bjorner, J. and Warejr, J. and Sullivan, E. and Straus, W.} } @article {554, title = {Evaluation parameters for computer adaptive testing}, journal = {British Journal of Educational Technology}, volume = {Vol. 37}, year = {2006}, pages = {261-278}, author = {Georgiadou, E. and Triantafillou, E. and Economides, A. A.} } @article {35, title = {Expansion of a physical function item bank and development of an abbreviated form for clinical research}, journal = {Journal of Applied Measurement}, volume = {7}, number = {1}, year = {2006}, pages = {1-15}, publisher = {Richard M Smith: US}, abstract = {We expanded an existing 33-item physical function (PF) item bank with a sufficient number of items to enable computerized adaptive testing (CAT). Ten items were written to expand the bank and the new item pool was administered to 295 people with cancer. For this analysis of the new pool, seven poorly performing items were identified for further examination. This resulted in a bank with items that define an essentially unidimensional PF construct, cover a wide range of that construct, reliably measure the PF of persons with cancer, and distinguish differences in self-reported functional performance levels. We also developed a 5-item (static) assessment form ("BriefPF") that can be used in clinical research to express scores on the same metric as the overall bank. The BriefPF was compared to the PF-10 from the Medical Outcomes Study SF-36. Both short forms significantly differentiated persons across functional performance levels. While the entire bank was more precise across the PF continuum than either short form, there were differences in the area of the continuum in which each short form was more precise: the BriefPF was more precise than the PF-10 at the lower functional levels and the PF-10 was more precise than the BriefPF at the higher levels. Future research on this bank will include the development of a CAT version, the PF-CAT. (PsycINFO Database Record (c) 2007 APA, all rights reserved)}, keywords = {clinical research, computerized adaptive testing, performance levels, physical function item bank, Psychometrics, test reliability, Test Validity}, isbn = {1529-7713 (Print)}, author = {Bode, R. K. and Lai, J-S. and Dineen, K. and Heinemann, A. W. and Shevrin, D. and Von Roenn, J. and Cella, D.} } @article {237, title = {Factor analysis techniques for assessing sufficient unidimensionality of cancer related fatigue}, journal = {Quality of Life Research}, volume = {15}, number = {7}, year = {2006}, note = {0962-9343 (Print)Journal ArticleResearch Support, N.I.H., Extramural}, month = {Sep}, pages = {1179-90}, abstract = {BACKGROUND: Fatigue is the most common unrelieved symptom experienced by people with cancer. The purpose of this study was to examine whether cancer-related fatigue (CRF) can be summarized using a single score, that is, whether CRF is sufficiently unidimensional for measurement approaches that require or assume unidimensionality. We evaluated this question using factor analysis techniques including the theory-driven bi-factor model. METHODS: Five hundred and fifty five cancer patients from the Chicago metropolitan area completed a 72-item fatigue item bank, covering a range of fatigue-related concerns including intensity, frequency and interference with physical, mental, and social activities. Dimensionality was assessed using exploratory and confirmatory factor analysis (CFA) techniques. RESULTS: Exploratory factor analysis (EFA) techniques identified from 1 to 17 factors. The bi-factor model suggested that CRF was sufficiently unidimensional. CONCLUSIONS: CRF can be considered sufficiently unidimensional for applications that require unidimensionality. One such application, item response theory (IRT), will facilitate the development of short-form and computer-adaptive testing. This may further enable practical and accurate clinical assessment of CRF.}, keywords = {*Factor Analysis, Statistical, *Quality of Life, Aged, Chicago, Fatigue/*etiology, Female, Humans, Male, Middle Aged, Neoplasms/*complications, Questionnaires}, author = {Lai, J-S. and Crane, P. K. and Cella, D.} } @article {2145, title = {A Feedback Control Strategy for Enhancing Item Selection Efficiency in Computerized Adaptive Testing}, journal = {Applied Psychological Measurement}, volume = {30}, number = {2}, year = {2006}, pages = {84-99}, abstract = {

A computerized adaptive test (CAT) may be modeled as a closed-loop system, where item selection is influenced by trait level (\θ) estimation and vice versa. When discrepancies exist between an examinee\&$\#$39;s estimated and true \θ levels, nonoptimal item selection is a likely result. Nevertheless, examinee response behavior consistent with optimal item selection can be predicted using item response theory (IRT), without knowledge of an examinee\&$\#$39;s true \θ level, yielding a specific reference point for applying an internal correcting or feedback control mechanism. Incorporating such a mechanism in a CAT is shown to be an effective strategy for increasing item selection efficiency. Results from simulation studies using maximum likelihood (ML) and modal a posteriori (MAP) trait-level estimation and Fisher information (FI) and Fisher interval information (FII) item selection are provided.

}, doi = {10.1177/0146621605282774}, url = {http://apm.sagepub.com/content/30/2/84.abstract}, author = {Weissman, Alexander} } @article {2217, title = {How Big Is Big Enough? Sample Size Requirements for CAST Item Parameter Estimation}, journal = {Applied Measurement in Education}, volume = {19}, number = {3}, year = {2006}, pages = {241-255}, doi = {10.1207/s15324818ame1903_5}, url = {http://www.tandfonline.com/doi/abs/10.1207/s15324818ame1903_5}, author = {Chuah, Siang Chee and F Drasgow and Luecht, Richard} } @article {2214, title = {An Introduction to Multistage Testing}, journal = {Applied Measurement in Education}, volume = {19}, number = {3}, year = {2006}, pages = {185-187}, doi = {10.1207/s15324818ame1903_1}, url = {http://www.tandfonline.com/doi/abs/10.1207/s15324818ame1903_1}, author = {Alan D Mead} } @article {162, title = {Item banks and their potential applications to health status assessment in diverse populations}, journal = {Medical Care}, volume = {44}, number = {11 Suppl 3}, year = {2006}, note = {0025-7079 (Print)Journal ArticleResearch Support, N.I.H., ExtramuralResearch Support, Non-U.S. Gov{\textquoteright}t}, month = {Nov}, pages = {S189-S197}, abstract = {In the context of an ethnically diverse, aging society, attention is increasingly turning to health-related quality of life measurement to evaluate healthcare and treatment options for chronic diseases. When evaluating and treating symptoms and concerns such as fatigue, pain, or physical function, reliable and accurate assessment is a priority. Modern psychometric methods have enabled us to move from long, static tests that provide inefficient and often inaccurate assessment of individual patients, to computerized adaptive tests (CATs) that can precisely measure individuals on health domains of interest. These modern methods, collectively referred to as item response theory (IRT), can produce calibrated "item banks" from larger pools of questions. From these banks, CATs can be conducted on individuals to produce their scores on selected domains. Item banks allow for comparison of patients across different question sets because the patient{\textquoteright}s score is expressed on a common scale. Other advantages of using item banks include flexibility in terms of the degree of precision desired; interval measurement properties under most circumstances; realistic capability for accurate individual assessment over time (using CAT); and measurement equivalence across different patient populations. This work summarizes the process used in the creation and evaluation of item banks and reviews their potential contributions and limitations regarding outcome assessment and patient care, particularly when they are applied across people of different cultural backgrounds.}, author = {Hahn, E. A. and Cella, D. and Bode, R. K. and Gershon, R. C. and Lai, J. S.} } @article {314, title = {[Item Selection Strategies of Computerized Adaptive Testing based on Graded Response Model.]}, journal = {Acta Psychologica Sinica}, volume = {38}, number = {3}, year = {2006}, pages = {461-467}, publisher = {Science Press: China}, abstract = {Item selection strategy (ISS) is an important component of Computerized Adaptive Testing (CAT). Its performance directly affects the security, efficiency and precision of the test. Thus, ISS becomes one of the central issues in CATs based on the Graded Response Model (GRM). It is well known that the goal of IIS is to administer the next unused item remaining in the item bank that best fits the examinees current ability estimate. In dichotomous IRT models, every item has only one difficulty parameter and the item whose difficulty matches the examinee{\textquoteright}s current ability estimate is considered to be the best fitting item. However, in GRM, each item has more than two ordered categories and has no single value to represent the item difficulty. Consequently, some researchers have used to employ the average or the median difficulty value across categories as the difficulty estimate for the item. Using the average value and the median value in effect introduced two corresponding ISSs. In this study, we used computer simulation compare four ISSs based on GRM. We also discussed the effect of "shadow pool" on the uniformity of pool usage as well as the influence of different item parameter distributions and different ability estimation methods on the evaluation criteria of CAT. In the simulation process, Monte Carlo method was adopted to simulate the entire CAT process; 1,000 examinees drawn from standard normal distribution and four 1,000-sized item pools of different item parameter distributions were also simulated. The assumption of the simulation is that a polytomous item is comprised of six ordered categories. In addition, ability estimates were derived using two methods. They were expected a posteriori Bayesian (EAP) and maximum likelihood estimation (MLE). In MLE, the Newton-Raphson iteration method and the Fisher Score iteration method were employed, respectively, to solve the likelihood equation. Moreover, the CAT process was simulated with each examinee 30 times to eliminate random error. The IISs were evaluated by four indices usually used in CAT from four aspects--the accuracy of ability estimation, the stability of IIS, the usage of item pool, and the test efficiency. Simulation results showed adequate evaluation of the ISS that matched the estimate of an examinee{\textquoteright}s current trait level with the difficulty values across categories. Setting "shadow pool" in ISS was able to improve the uniformity of pool utilization. Finally, different distributions of the item parameter and different ability estimation methods affected the evaluation indices of CAT. (PsycINFO Database Record (c) 2007 APA, all rights reserved)}, keywords = {computerized adaptive testing, item selection strategy}, isbn = {0439-755X (Print)}, author = {Ping, Chen and Shuliang, Ding and Haijing, Lin and Jie, Zhou} } @article {2087, title = {Kernel-smoothed DIF detection procedure for computerized adaptive tests (Computerized testing report 00-08)}, year = {2006}, institution = {Law School Admission Council}, address = {Newton, PA}, author = {Nandakumar, R. and Banks, J. C. and Roussos, L. A.} } @article {16, title = {Maximum information stratification method for controlling item exposure in computerized adaptive testing}, journal = {Psicothema}, volume = {18}, number = {1}, year = {2006}, note = {Barrada, Juan RamonMazuela, PalomaOlea, JulioResearch Support, Non-U.S. Gov{\textquoteright}tSpainPsicothemaPsicothema. 2006 Feb;18(1):156-9.}, month = {Feb}, pages = {156-159}, edition = {2007/02/14}, abstract = {The proposal for increasing the security in Computerized Adaptive Tests that has received most attention in recent years is the a-stratified method (AS - Chang and Ying, 1999): at the beginning of the test only items with low discrimination parameters (a) can be administered, with the values of the a parameters increasing as the test goes on. With this method, distribution of the exposure rates of the items is less skewed, while efficiency is maintained in trait-level estimation. The pseudo-guessing parameter (c), present in the three-parameter logistic model, is considered irrelevant, and is not used in the AS method. The Maximum Information Stratified (MIS) model incorporates the c parameter in the stratification of the bank and in the item-selection rule, improving accuracy by comparison with the AS, for item banks with a and b parameters correlated and uncorrelated. For both kinds of banks, the blocking b methods (Chang, Qian and Ying, 2001) improve the security of the item bank.M{\'e}todo de estratificaci{\'o}n por m{\'a}xima informaci{\'o}n para el control de la exposici{\'o}n en tests adaptativos informatizados. La propuesta para aumentar la seguridad en los tests adaptativos informatizados que ha recibido m{\'a}s atenci{\'o}n en los {\'u}ltimos a{\~n}os ha sido el m{\'e}todo a-estratificado (AE - Chang y Ying, 1999): en los momentos iniciales del test s{\'o}lo pueden administrarse {\'\i}tems con bajos par{\'a}metros de discriminaci{\'o}n (a), increment{\'a}ndose los valores del par{\'a}metro a admisibles seg{\'u}n avanza el test. Con este m{\'e}todo la distribuci{\'o}n de las tasas de exposici{\'o}n de los {\'\i}tems es m{\'a}s equilibrada, manteniendo una adecuada precisi{\'o}n en la medida. El par{\'a}metro de pseudoadivinaci{\'o}n (c), presente en el modelo log{\'\i}stico de tres par{\'a}metros, se supone irrelevante y no se incorpora en el AE. El m{\'e}todo de Estratificaci{\'o}n por M{\'a}xima Informaci{\'o}n (EMI) incorpora el par{\'a}metro c a la estratificaci{\'o}n del banco y a la regla de selecci{\'o}n de {\'\i}tems, mejorando la precisi{\'o}n en comparaci{\'o}n con AE, tanto para bancos donde los par{\'a}metros a y b correlacionan como para bancos donde no. Para ambos tipos de bancos, los m{\'e}todos de bloqueo de b (Chang, Qian y Ying, 2001) mejoran la seguridad del banco.}, keywords = {*Artificial Intelligence, *Microcomputers, *Psychological Tests, *Software Design, Algorithms, Chi-Square Distribution, Humans, Likelihood Functions}, isbn = {0214-9915 (Print)}, author = {Barrada, J and Mazuela, P. and Olea, J.} } @article {174, title = {Measurement precision and efficiency of multidimensional computer adaptive testing of physical functioning using the pediatric evaluation of disability inventory}, journal = {Archives of Physical Medicine and Rehabilitation}, volume = {87}, number = {9}, year = {2006}, note = {Haley, Stephen MNi, PengshengLudlow, Larry HFragala-Pinkham, Maria AK02 hd45354-01/hd/nichdResearch Support, N.I.H., ExtramuralResearch Support, Non-U.S. Gov{\textquoteright}tUnited StatesArchives of physical medicine and rehabilitationArch Phys Med Rehabil. 2006 Sep;87(9):1223-9.}, month = {Sep}, pages = {1223-9}, edition = {2006/08/29}, abstract = {OBJECTIVE: To compare the measurement efficiency and precision of a multidimensional computer adaptive testing (M-CAT) application to a unidimensional CAT (U-CAT) comparison using item bank data from 2 of the functional skills scales of the Pediatric Evaluation of Disability Inventory (PEDI). DESIGN: Using existing PEDI mobility and self-care item banks, we compared the stability of item calibrations and model fit between unidimensional and multidimensional Rasch models and compared the efficiency and precision of the U-CAT- and M-CAT-simulated assessments to a random draw of items. SETTING: Pediatric rehabilitation hospital and clinics. PARTICIPANTS: Clinical and normative samples. INTERVENTIONS: Not applicable. MAIN OUTCOME MEASURES: Not applicable. RESULTS: The M-CAT had greater levels of precision and efficiency than the separate mobility and self-care U-CAT versions when using a similar number of items for each PEDI subdomain. Equivalent estimation of mobility and self-care scores can be achieved with a 25\% to 40\% item reduction with the M-CAT compared with the U-CAT. CONCLUSIONS: M-CAT applications appear to have both precision and efficiency advantages compared with separate U-CAT assessments when content subdomains have a high correlation. Practitioners may also realize interpretive advantages of reporting test score information for each subdomain when separate clinical inferences are desired.}, keywords = {*Disability Evaluation, *Pediatrics, Adolescent, Child, Child, Preschool, Computers, Disabled Persons/*classification/rehabilitation, Efficiency, Humans, Infant, Outcome Assessment (Health Care), Psychometrics, Self Care}, isbn = {0003-9993 (Print)}, author = {Haley, S. M. and Ni, P. and Ludlow, L. H. and Fragala-Pinkham, M. A.} } @article {311, title = {Multidimensional computerized adaptive testing of the EORTC QLQ-C30: basic developments and evaluations}, journal = {Quality of Life Research}, volume = {15}, number = {3}, year = {2006}, note = {Petersen, Morten AaGroenvold, MogensAaronson, NeilFayers, PeterSprangers, MirjamBjorner, Jakob BEuropean Organisation for Research and Treatment of Cancer Quality of Life GroupResearch Support, Non-U.S. Gov{\textquoteright}tNetherlandsQuality of life research : an international journal of quality of life aspects of treatment, care and rehabilitationQual Life Res. 2006 Apr;15(3):315-29.}, month = {Apr}, pages = {315-29}, edition = {2006/03/21}, abstract = {OBJECTIVE: Self-report questionnaires are widely used to measure health-related quality of life (HRQOL). Ideally, such questionnaires should be adapted to the individual patient and at the same time scores should be directly comparable across patients. This may be achieved using computerized adaptive testing (CAT). Usually, CAT is carried out for a single domain at a time. However, many HRQOL domains are highly correlated. Multidimensional CAT may utilize these correlations to improve measurement efficiency. We investigated the possible advantages and difficulties of multidimensional CAT. STUDY DESIGN AND SETTING: We evaluated multidimensional CAT of three scales from the EORTC QLQ-C30: the physical functioning, emotional functioning, and fatigue scales. Analyses utilised a database with 2958 European cancer patients. RESULTS: It was possible to obtain scores for the three domains with five to seven items administered using multidimensional CAT that were very close to the scores obtained using all 12 items and with no or little loss of measurement precision. CONCLUSION: The findings suggest that multidimensional CAT may significantly improve measurement precision and efficiency and encourage further research into multidimensional CAT. Particularly, the estimation of the model underlying the multidimensional CAT and the conceptual aspects need further investigations.}, keywords = {*Quality of Life, *Self Disclosure, Adult, Female, Health Status, Humans, Male, Middle Aged, Questionnaires/*standards, User-Computer Interface}, isbn = {0962-9343 (Print)}, author = {Petersen, M. A. and Groenvold, M. and Aaronson, N. K. and Fayers, P. and Sprangers, M. and Bjorner, J. B.} } @conference {838, title = {Multiple maximum exposure rates in computerized adaptive testing}, booktitle = {Paper presented at the SMABS-EAM Conference}, year = {2006}, address = {Budapest, Hungary}, author = {Barrada, J and Veldkamp, B. P. and Olea, J.} } @article {2218, title = {Multistage Testing: Widely or Narrowly Applicable?}, journal = {Applied Measurement in Education}, volume = {19}, number = {3}, year = {2006}, pages = {257-260}, doi = {10.1207/s15324818ame1903_6}, url = {http://www.tandfonline.com/doi/abs/10.1207/s15324818ame1903_6}, author = {Stark, Stephen and Chernyshenko, Oleksandr S.} } @article {181, title = {Optimal and nonoptimal computer-based test designs for making pass-fail decisions}, journal = {Applied Measurement in Education}, volume = {19}, number = {3}, year = {2006}, pages = {221-239}, publisher = {Lawrence Erlbaum: US}, abstract = {Now that many credentialing exams are being routinely administered by computer, new computer-based test designs, along with item response theory models, are being aggressively researched to identify specific designs that can increase the decision consistency and accuracy of pass-fail decisions. The purpose of this study was to investigate the impact of optimal and nonoptimal multistage test (MST) designs, linear parallel-form test designs (LPFT), and computer adaptive test (CAT) designs on the decision consistency and accuracy of pass-fail decisions. Realistic testing situations matching those of one of the large credentialing agencies were simulated to increase the generalizability of the findings. The conclusions were clear: (a) With the LPFTs, matching test information functions (TIFs) to the mean of the proficiency distribution produced slightly better results than matching them to the passing score; (b) all of the test designs worked better than test construction using random selection of items, subject to content constraints only; (c) CAT performed better than the other test designs; and (d) if matching a TIP to the passing score, the MST design produced a bit better results than the LPFT design. If an argument for the MST design is to be made, it can be made on the basis of slight improvements over the LPFT design and better expected item bank utilization, candidate preference, and the potential for improved diagnostic feedback, compared with the feedback that is possible with fixed linear test forms. (PsycINFO Database Record (c) 2007 APA, all rights reserved)}, keywords = {adaptive test, credentialing exams, Decision Making, Educational Measurement, multistage tests, optimal computer-based test designs, test form}, isbn = {0895-7347 (Print); 1532-4818 (Electronic)}, author = {Hambleton, R. K. and Xing, D.} } @article {2147, title = {Optimal Testing With Easy or Difficult Items in Computerized Adaptive Testing}, journal = {Applied Psychological Measurement}, volume = {30}, number = {5}, year = {2006}, pages = {379-393}, abstract = {

Computerized adaptive tests (CATs) are individualized tests that, from a measurement point of view, are optimal for each individual, possibly under some practical conditions. In the present study, it is shown that maximum information item selection in CATs using an item bank that is calibrated with the one or the two-parameter logistic model results in each individual answering about 50\% of the items correctly. Two item selection procedures giving easier (or more difficult) tests for students are presented and evaluated. Item selection on probability points of items yields good results only with the one-parameter logistic model and not with the two-parameter logistic model. An alternative selection procedure, based on maximum information at a shifted ability level, gives satisfactory results with both models. Index terms: computerized adaptive testing, item selection, item response theory

}, doi = {10.1177/0146621606288890}, url = {http://apm.sagepub.com/content/30/5/379.abstract}, author = {Theo Eggen and Verschoor, Angela J.} } @article {116, title = {Optimal testing with easy or difficult items in computerized adaptive testing}, journal = {Applied Psychological Measurement}, volume = {30}, number = {5}, year = {2006}, pages = {379-393}, publisher = {Sage Publications: US}, abstract = {Computerized adaptive tests (CATs) are individualized tests that, from a measurement point of view, are optimal for each individual, possibly under some practical conditions. In the present study, it is shown that maximum information item selection in CATs using an item bank that is calibrated with the one- or the two-parameter logistic model results in each individual answering about 50\% of the items correctly. Two item selection procedures giving easier (or more difficult) tests for students are presented and evaluated. Item selection on probability points of items yields good results only with the one-parameter logistic model and not with the two-parameter logistic model. An alternative selection procedure, based on maximum information at a shifted ability level, gives satisfactory results with both models. (PsycINFO Database Record (c) 2007 APA, all rights reserved)}, keywords = {computer adaptive tests, individualized tests, Item Response Theory, item selection, Measurement}, isbn = {0146-6216 (Print)}, author = {Theo Eggen and Verschoor, Angela J.} } @article {2146, title = {Optimal Testlet Pool Assembly for Multistage Testing Designs}, journal = {Applied Psychological Measurement}, volume = {30}, number = {3}, year = {2006}, pages = {204-215}, abstract = {

Computerized multistage testing (MST) designs require sets of test questions (testlets) to be assembled to meet strict, often competing criteria. Rules that govern testlet assembly may dictate the number of questions on a particular subject or may describe desirable statistical properties for the test, such as measurement precision. In an MST design, testlets of differing difficulty levels must be created. Statistical properties for assembly of the testlets can be expressed using item response theory (IRT) parameters. The testlet test information function (TIF) value can be maximized at a specific point on the IRT ability scale. In practical MST designs, parallel versions of testlets are needed, so sets of testlets with equivalent properties are built according to equivalent specifications. In this project, the authors study the use of a mathematical programming technique to simultaneously assemble testlets to ensure equivalence and fairness to candidates who may be administered different testlets.

}, doi = {10.1177/0146621605284350}, url = {http://apm.sagepub.com/content/30/3/204.abstract}, author = {Ariel, Adelaide and Veldkamp, Bernard P. and Breithaupt, Krista} } @article {384, title = {Overview of quantitative measurement methods. Equivalence, invariance, and differential item functioning in health applications}, journal = {Medical Care}, volume = {44}, number = {11 Suppl 3}, year = {2006}, note = {Teresi, Jeanne AAG15294/AG/NIA NIH HHS/United StatesResearch Support, N.I.H., ExtramuralResearch Support, Non-U.S. Gov{\textquoteright}tReviewUnited StatesMedical careMed Care. 2006 Nov;44(11 Suppl 3):S39-49.}, month = {Nov}, pages = {S39-49}, edition = {2006/10/25}, abstract = {BACKGROUND: Reviewed in this article are issues relating to the study of invariance and differential item functioning (DIF). The aim of factor analyses and DIF, in the context of invariance testing, is the examination of group differences in item response conditional on an estimate of disability. Discussed are parameters and statistics that are not invariant and cannot be compared validly in crosscultural studies with varying distributions of disability in contrast to those that can be compared (if the model assumptions are met) because they are produced by models such as linear and nonlinear regression. OBJECTIVES: The purpose of this overview is to provide an integrated approach to the quantitative methods used in this special issue to examine measurement equivalence. The methods include classical test theory (CTT), factor analytic, and parametric and nonparametric approaches to DIF detection. Also included in the quantitative section is a discussion of item banking and computerized adaptive testing (CAT). METHODS: Factorial invariance and the articles discussing this topic are introduced. A brief overview of the DIF methods presented in the quantitative section of the special issue is provided together with a discussion of ways in which DIF analyses and examination of invariance using factor models may be complementary. CONCLUSIONS: Although factor analytic and DIF detection methods share features, they provide unique information and can be viewed as complementary in informing about measurement equivalence.}, keywords = {*Cross-Cultural Comparison, Data Interpretation, Statistical, Factor Analysis, Statistical, Guidelines as Topic, Humans, Models, Statistical, Psychometrics/*methods, Statistics as Topic/*methods, Statistics, Nonparametric}, isbn = {0025-7079 (Print)0025-7079 (Linking)}, author = {Teresi, J. A.} } @article {2130, title = {Sensitivity of a computer adaptive assessment for measuring functional mobility changes in children enrolled in a community fitness programme}, journal = {Clin Rehabil}, volume = {20}, number = {6}, year = {2006}, pages = {616-622}, author = {Haley, S. M. and Fragala-Pinkham, M. A. and Ni, P.} } @article {2232, title = {Sequential Computerized Mastery Tests{\^a}{\texteuro}{\textquotedblright}Three Simulation Studies}, journal = {International Journal of Testing}, volume = {6}, number = {1}, year = {2006}, pages = {41-55}, doi = {10.1207/s15327574ijt0601_3}, url = {http://www.tandfonline.com/doi/abs/10.1207/s15327574ijt0601_3}, author = {Wiberg, Marie} } @article {319, title = {SIMCAT 1.0: A SAS computer program for simulating computer adaptive testing}, journal = {Applied Psychological Measurement}, volume = {30}, number = {1}, year = {2006}, pages = {60-61}, publisher = {Sage Publications: US}, abstract = {Monte Carlo methodologies are frequently applied to study the sampling distribution of the estimated proficiency level in adaptive testing. These methods eliminate real situational constraints. However, these Monte Carlo methodologies are not currently supported by the available software programs, and when these programs are available, their flexibility is limited. SIMCAT 1.0 is aimed at the simulation of adaptive testing sessions under different adaptive expected a posteriori (EAP) proficiency-level estimation methods (Blais \& Ra{\^\i}che, 2005; Ra{\^\i}che \& Blais, 2005) based on the one-parameter Rasch logistic model. These methods are all adaptive in the a priori proficiency-level estimation, the proficiency-level estimation bias correction, the integration interval, or a combination of these factors. The use of these adaptive EAP estimation methods diminishes considerably the shrinking, and therefore biasing, effect of the estimated a priori proficiency level encountered when this a priori is fixed at a constant value independently of the computed previous value of the proficiency level. SIMCAT 1.0 also computes empirical and estimated skewness and kurtosis coefficients, such as the standard error, of the estimated proficiency-level sampling distribution. In this way, the program allows one to compare empirical and estimated properties of the estimated proficiency-level sampling distribution under different variations of the EAP estimation method: standard error and bias, like the skewness and kurtosis coefficients. (PsycINFO Database Record (c) 2007 APA, all rights reserved)}, keywords = {computer adaptive testing, computer program, estimated proficiency level, Monte Carlo methodologies, Rasch logistic model}, isbn = {0146-6216 (Print)}, author = {Ra{\^\i}che, G. and Blais, J-G.} } @article {566, title = {Simulated computerized adaptive test for patients with lumbar spine impairments was efficient and produced valid measures of function}, journal = {Journal of Clinical Epidemiology}, volume = {59}, year = {2006}, pages = {947-956}, author = {Hart, D. and Mioduski, J. and Werenke, M. and Stratford, P.} } @article {2073, title = {Simulated computerized adaptive test for patients with lumbar spine impairments was efficient and produced valid measures of function}, journal = {Journal of Clinical Epidemiology}, volume = {59}, year = {2006}, pages = {947{\textendash}956}, abstract = {Objective: To equate physical functioning (PF) items with Back Pain Functional Scale (BPFS) items, develop a computerized adaptive test (CAT) designed to assess lumbar spine functional status (LFS) in people with lumbar spine impairments, and compare discriminant validity of LFS measures (qIRT) generated using all items analyzed with a rating scale Item Response Theory model (RSM) and measures generated using the simulated CAT (qCAT). Methods: We performed a secondary analysis of retrospective intake rehabilitation data. Results: Unidimensionality and local independence of 25 BPFS and PF items were supported. Differential item functioning was negligible for levels of symptom acuity, gender, age, and surgical history. The RSM fit the data well. A lumbar spine specific CAT was developed that was 72\% more efficient than using all 25 items to estimate LFS measures. qIRT and qCAT measures did not discriminate patients by symptom acuity, age, or gender, but discriminated patients by surgical history in similar clinically logical ways. qCAT measures were as precise as qIRT measures. Conclusion: A body part specific simulated CAT developed from an LFS item bank was efficient and produced precise measures of LFS without eroding discriminant validity.}, keywords = {Back Pain Functional Scale, computerized adaptive testing, Item Response Theory, Lumbar spine, Rehabilitation, True-score equating}, doi = {10.1016/j.jclinepi.2005.10.017}, author = {Hart, D. L. and Mioduski, J. E. and Werneke, M. W. and Stratford, P. W.} } @article {184, title = {Simulated computerized adaptive test for patients with shoulder impairments was efficient and produced valid measures of function}, journal = {Journal of Clinical Epidemiology}, volume = {59}, number = {3}, year = {2006}, note = {0895-4356 (Print)Journal ArticleValidation Studies}, pages = {290-8}, abstract = {BACKGROUND AND OBJECTIVE: To test unidimensionality and local independence of a set of shoulder functional status (SFS) items, develop a computerized adaptive test (CAT) of the items using a rating scale item response theory model (RSM), and compare discriminant validity of measures generated using all items (theta(IRT)) and measures generated using the simulated CAT (theta(CAT)). STUDY DESIGN AND SETTING: We performed a secondary analysis of data collected prospectively during rehabilitation of 400 patients with shoulder impairments who completed 60 SFS items. RESULTS: Factor analytic techniques supported that the 42 SFS items formed a unidimensional scale and were locally independent. Except for five items, which were deleted, the RSM fit the data well. The remaining 37 SFS items were used to generate the CAT. On average, 6 items were needed to estimate precise measures of function using the SFS CAT, compared with all 37 SFS items. The theta(IRT) and theta(CAT) measures were highly correlated (r = .96) and resulted in similar classifications of patients. CONCLUSION: The simulated SFS CAT was efficient and produced precise, clinically relevant measures of functional status with good discriminating ability.}, keywords = {*Computer Simulation, *Range of Motion, Articular, Activities of Daily Living, Adult, Aged, Aged, 80 and over, Factor Analysis, Statistical, Female, Humans, Male, Middle Aged, Prospective Studies, Reproducibility of Results, Research Support, N.I.H., Extramural, Research Support, U.S. Gov{\textquoteright}t, Non-P.H.S., Shoulder Dislocation/*physiopathology/psychology/rehabilitation, Shoulder Pain/*physiopathology/psychology/rehabilitation, Shoulder/*physiopathology, Sickness Impact Profile, Treatment Outcome}, author = {Hart, D. L. and Cook, K. F. and Mioduski, J. E. and Teal, C. R. and Crane, P. K.} } @article {2074, title = {Simulated computerized adaptive test for patients with shoulder impairments was efficient and produced valid measures of function}, journal = {Journal of Clinical Epidemiology}, volume = {59}, year = {2006}, pages = {290-298}, abstract = {

Background and Objective: To test unidimensionality and local independence of a set of shoulder functional status (SFS) items,
develop a computerized adaptive test (CAT) of the items using a rating scale item response theory model (RSM), and compare discriminant validity of measures generated using all items (qIRT) and measures generated using the simulated CAT (qCAT).
Study Design and Setting: We performed a secondary analysis of data collected prospectively during rehabilitation of 400 patients
with shoulder impairments who completed 60 SFS items.
Results: Factor analytic techniques supported that the 42 SFS items formed a unidimensional scale and were locally independent. Except for five items, which were deleted, the RSM fit the data well. The remaining 37 SFS items were used to generate the CAT. On average, 6 items on were needed to estimate precise measures of function using the SFS CAT, compared with all 37 SFS items. The qIRT and qCAT measures were highly correlated (r 5 .96) and resulted in similar classifications of patients.
Conclusion: The simulated SFS CAT was efficient and produced precise, clinically relevant measures of functional status with good
discriminating ability.\ 

}, keywords = {computerized adaptive testing, Flexilevel Scale of Shoulder Function, Item Response Theory, Rehabilitation}, author = {Hart, D. L. and Cook, K. F. and Mioduski, J. E. and Teal, C. R. and Crane, P. K.} } @article {296, title = {T{\'e}cnicas para detectar patrones de respuesta at{\'\i}picos [Aberrant patterns detection methods]}, journal = {Anales de Psicolog{\'\i}a}, volume = {22}, number = {1}, year = {2006}, note = {Spain: Universidad de Murcia}, pages = {143-154}, abstract = {La identificaci{\'o}n de patrones de respuesta at{\'\i}picos es de gran utilidad para la construcci{\'o}n de tests y de bancos de {\'\i}tems con propiedades psicom{\'e}tricas as{\'\i} como para el an{\'a}lisis de validez de los mismos. En este trabajo de revisi{\'o}n se han recogido los m{\'a}s relevantes y novedosos m{\'e}todos de ajuste de personas que se han elaborado dentro de cada uno de los principales {\'a}mbitos de trabajo de la Psicometr{\'\i}a: el escalograma de Guttman, la Teor{\'\i}a Cl{\'a}sica de Tests (TCT), la Teor{\'\i}a de la Generalizabilidad (TG), la Teor{\'\i}a de Respuesta al {\'I}tem (TRI), los Modelos de Respuesta al {\'I}tem No Param{\'e}tricos (MRINP), los Modelos de Clase Latente de Orden Restringido (MCL-OR) y el An{\'a}lisis de Estructura de Covarianzas (AEC).Aberrant patterns detection has a great usefulness in order to make tests and item banks with psychometric characteristics and validity analysis of tests and items. The most relevant and newest person-fit methods have been reviewed. All of them have been made in each one of main areas of Psychometry: Guttman{\textquoteright}s scalogram, Classical Test Theory (CTT), Generalizability Theory (GT), Item Response Theory (IRT), Non-parametric Response Models (NPRM), Order-Restricted Latent Class Models (OR-LCM) and Covariance Structure Analysis (CSA).}, keywords = {aberrant patterns detection, Classical Test Theory, generalizability theory, Item Response, Item Response Theory, Mathematics, methods, person-fit, Psychometrics, psychometry, Test Validity, test validity analysis, Theory}, isbn = {0212-9728}, author = {N{\'u}{\~n}ez, R. M. N. and Pina, J. A. L.} } @article {2215, title = {A testlet assembly design for the uniform CPA Examination}, journal = {Applied Measurement in Education}, volume = {19}, number = {3}, year = {2006}, pages = {189-202}, doi = {10.1207/s15324818ame1903_2}, url = {http://www.tandfonline.com/doi/abs/10.1207/s15324818ame1903_2}, author = {Luecht, Richard and Brumfield, Terry and Breithaupt, Krista} } @mastersthesis {2272, title = {Validit{\"a}tssteigerungen durch adaptives Testen [Increasing validity by adaptive testing]. }, year = {2006}, type = {Doctoral}, author = {Frey, A.} } @conference {1059, title = {A variant of the progressive restricted item exposure control procedure in computerized adaptive testing systems based on the 3PL and the partial credit model}, booktitle = {Paper presented at the annual meetings of the American Educational Research Association}, year = {2006}, address = {San Francisco}, author = {McClarty, L. K. and Sperling, R. and Dodd, B. G.} } @inbook {1759, title = {Adaptive orientation methods in computer adaptive testing}, year = {2005}, address = {Proceedings E-Learn 2005 World Conference on E-Learning in Corporate, Government, Healthcare, and Higher Education, pp. 1290-1295, Vancouver, Canada, AACE, October 2005.}, author = {Economides, A. A.} } @book {1702, title = {Adaptive selection of personality items to inform a neural network predicting job performance}, year = {2005}, note = {{PDF file, 488 KB}}, address = {Unpublished doctoral dissertation, University of Washington}, author = {Thissen-Roe, A.} } @inbook {180, title = {Applications of item response theory to improve health outcomes assessment: Developing item banks, linking instruments, and computer-adaptive testing}, booktitle = {Outcomes assessment in cancer}, year = {2005}, note = {Using Smart Source ParsingOutcomes assessment in cancer: Measures, methods, and applications. (pp. 445-464). New York, NY : Cambridge University Press. xiv, 662 pp}, pages = {445-464}, publisher = {Cambridge University Press}, organization = {Cambridge University Press}, address = {Cambridge, UK}, abstract = {(From the chapter) The current chapter builds on Reise{\textquoteright}s introduction to the basic concepts, assumptions, popular models, and important features of IRT and discusses the applications of item response theory (IRT) modeling to health outcomes assessment. In particular, we highlight the critical role of IRT modeling in: developing an instrument to match a study{\textquoteright}s population; linking two or more instruments measuring similar constructs on a common metric; and creating item banks that provide the foundation for tailored short-form instruments or for computerized adaptive assessments. (PsycINFO Database Record (c) 2005 APA )}, keywords = {Computer Assisted Testing, Health, Item Response Theory, Measurement, Test Construction, Treatment Outcomes}, author = {Hambleton, R. K.}, editor = {C. C. Gotay and C. Snyder} } @article {166, title = {Assessing Mobility in Children Using a Computer Adaptive Testing Version of the Pediatric Evaluation of Disability Inventory}, journal = {Archives of Physical Medicine and Rehabilitation}, volume = {86}, number = {5}, year = {2005}, pages = {932-939}, isbn = {00039993}, author = {Haley, S. and Raczek, A. and Coster, W. and Dumas, H. and Fragalapinkham, M.} } @article {175, title = {Assessing mobility in children using a computer adaptive testing version of the pediatric evaluation of disability inventory}, journal = {Archives of Physical Medicine and Rehabilitation}, volume = {86}, number = {5}, year = {2005}, note = {Haley, Stephen MRaczek, Anastasia ECoster, Wendy JDumas, Helene MFragala-Pinkham, Maria AK02 hd45354-01a1/hd/nichdR43 hd42388-01/hd/nichdResearch Support, N.I.H., ExtramuralResearch Support, U.S. Gov{\textquoteright}t, P.H.S.United StatesArchives of physical medicine and rehabilitationArch Phys Med Rehabil. 2005 May;86(5):932-9.}, month = {May}, pages = {932-9}, edition = {2005/05/17}, abstract = {OBJECTIVE: To assess score agreement, validity, precision, and response burden of a prototype computerized adaptive testing (CAT) version of the Mobility Functional Skills Scale (Mob-CAT) of the Pediatric Evaluation of Disability Inventory (PEDI) as compared with the full 59-item version (Mob-59). DESIGN: Computer simulation analysis of cross-sectional and longitudinal retrospective data; and cross-sectional prospective study. SETTING: Pediatric rehabilitation hospital, including inpatient acute rehabilitation, day school program, outpatient clinics, community-based day care, preschool, and children{\textquoteright}s homes. PARTICIPANTS: Four hundred sixty-nine children with disabilities and 412 children with no disabilities (analytic sample); 41 children without disabilities and 39 with disabilities (cross-validation sample). INTERVENTIONS: Not applicable. MAIN OUTCOME MEASURES: Summary scores from a prototype Mob-CAT application and versions using 15-, 10-, and 5-item stopping rules; scores from the Mob-59; and number of items and time (in seconds) to administer assessments. RESULTS: Mob-CAT scores from both computer simulations (intraclass correlation coefficient [ICC] range, .94-.99) and field administrations (ICC=.98) were in high agreement with scores from the Mob-59. Using computer simulations of retrospective data, discriminant validity, and sensitivity to change of the Mob-CAT closely approximated that of the Mob-59, especially when using the 15- and 10-item stopping rule versions of the Mob-CAT. The Mob-CAT used no more than 15\% of the items for any single administration, and required 20\% of the time needed to administer the Mob-59. CONCLUSIONS: Comparable score estimates for the PEDI mobility scale can be obtained from CAT administrations, with losses in validity and precision for shorter forms, but with a considerable reduction in administration time.}, keywords = {*Computer Simulation, *Disability Evaluation, Adolescent, Child, Child, Preschool, Cross-Sectional Studies, Disabled Children/*rehabilitation, Female, Humans, Infant, Male, Outcome Assessment (Health Care)/*methods, Rehabilitation Centers, Rehabilitation/*standards, Sensitivity and Specificity}, isbn = {0003-9993 (Print)}, author = {Haley, S. M. and Raczek, A. E. and Coster, W. J. and Dumas, H. M. and Fragala-Pinkham, M. A.} } @article {2095, title = {An Authoring Environment for Adaptive Testing}, journal = {Educational Technology \& Society}, volume = {8}, year = {2005}, pages = {66-76}, abstract = {

SIETTE is a web-based adaptive testing system. It implements Computerized Adaptive Tests. These tests are tailor-made, theory-based tests, where questions shown to students, finalization of the test, and student knowledge estimation is accomplished adaptively. To construct these tests, SIETTE has an authoring environment comprising a suite of tools that helps teachers create questions and tests properly, and analyze students\’ performance after taking a test. In this paper, we present this authoring environment in the
framework of adaptive testing. As will be shown, this set of visual tools, that contain some adaptable eatures, can be useful for teachers lacking skills in this kind of testing. Additionally, other systems that implement adaptive testing will be studied.\ 

}, keywords = {Adaptability, Adaptive Testing, Authoring environment, Item Response Theory}, author = {Guzm{\'a}n, E and Conejo, R and Garc{\'\i}a-Herv{\'a}s, E} } @article {2231, title = {Automated Simultaneous Assembly for Multistage Testing}, journal = {International Journal of Testing}, volume = {5}, number = {3}, year = {2005}, pages = {319-330}, doi = {10.1207/s15327574ijt0503_8}, url = {http://www.tandfonline.com/doi/abs/10.1207/s15327574ijt0503_8}, author = {Breithaupt, Krista and Ariel, Adelaide and Veldkamp, Bernard P.} } @article {102, title = {A Bayesian student model without hidden nodes and its comparison with item response theory}, journal = {International Journal of Artificial Intelligence in Education}, volume = {15}, number = {4}, year = {2005}, pages = {291-323}, publisher = {IOS Press: Netherlands}, abstract = {The Bayesian framework offers a number of techniques for inferring an individual{\textquoteright}s knowledge state from evidence of mastery of concepts or skills. A typical application where such a technique can be useful is Computer Adaptive Testing (CAT). A Bayesian modeling scheme, POKS, is proposed and compared to the traditional Item Response Theory (IRT), which has been the prevalent CAT approach for the last three decades. POKS is based on the theory of knowledge spaces and constructs item-to-item graph structures without hidden nodes. It aims to offer an effective knowledge assessment method with an efficient algorithm for learning the graph structure from data. We review the different Bayesian approaches to modeling student ability assessment and discuss how POKS relates to them. The performance of POKS is compared to the IRT two parameter logistic model. Experimental results over a 34 item Unix test and a 160 item French language test show that both approaches can classify examinees as master or non-master effectively and efficiently, with relatively comparable performance. However, more significant differences are found in favor of POKS for a second task that consists in predicting individual question item outcome. Implications of these results for adaptive testing and student modeling are discussed, as well as the limitations and advantages of POKS, namely the issue of integrating concepts into its structure. (PsycINFO Database Record (c) 2007 APA, all rights reserved)}, keywords = {Bayesian Student Model, computer adaptive testing, hidden nodes, Item Response Theory}, isbn = {1560-4292 (Print); 1560-4306 (Electronic)}, author = {Desmarais, M. C. and Pu, X.} } @article {2128, title = {A closer look at using judgments of item difficulty to change answers on computerized adaptive tests}, journal = {Journal of Educational Measurement}, volume = {42}, number = {4}, year = {2005}, pages = {331-350}, author = {Vispoel, W. P. and Clough, S. J. and Bleiler, T.} } @book {1676, title = {A comparison of adaptive mastery testing using testlets with the 3-parameter logistic model}, year = {2005}, address = {Unpublished doctoral dissertation, University of Minnesota, Minneapolis, MN}, author = {Jacobs-Cassuto, M.S.} } @article {741, title = {A comparison of item-selection methods for adaptive tests with content constraints}, journal = {Journal of Educational Measurement}, volume = {42}, year = {2005}, pages = {283-302}, author = {van der Linden, W. J.} } @article {398, title = {A comparison of item-selection methods for adaptive tests with content constraints}, journal = {Journal of Educational Measurement}, volume = {42}, number = {3}, year = {2005}, pages = {283-302}, publisher = {Blackwell Publishing: United Kingdom}, abstract = {In test assembly, a fundamental difference exists between algorithms that select a test sequentially or simultaneously. Sequential assembly allows us to optimize an objective function at the examinee{\textquoteright}s ability estimate, such as the test information function in computerized adaptive testing. But it leads to the non-trivial problem of how to realize a set of content constraints on the test{\textemdash}a problem more naturally solved by a simultaneous item-selection method. Three main item-selection methods in adaptive testing offer solutions to this dilemma. The spiraling method moves item selection across categories of items in the pool proportionally to the numbers needed from them. Item selection by the weighted-deviations method (WDM) and the shadow test approach (STA) is based on projections of the future consequences of selecting an item. These two methods differ in that the former calculates a projection of a weighted sum of the attributes of the eventual test and the latter a projection of the test itself. The pros and cons of these methods are analyzed. An empirical comparison between the WDM and STA was conducted for an adaptive version of the Law School Admission Test (LSAT), which showed equally good item-exposure rates but violations of some of the constraints and larger bias and inaccuracy of the ability estimator for the WDM.}, keywords = {Adaptive Testing, Algorithms, content constraints, item selection method, shadow test approach, spiraling method, weighted deviations method}, isbn = {0022-0655 (Print)}, author = {van der Linden, W. J.} } @article {150, title = {Computer adaptive testing}, journal = {Journal of Applied Measurement}, volume = {6}, number = {1}, year = {2005}, note = {Gershon, Richard CReviewUnited StatesJournal of applied measurementJ Appl Meas. 2005;6(1):109-27.}, pages = {109-27}, edition = {2005/02/11}, abstract = {The creation of item response theory (IRT) and Rasch models, inexpensive accessibility to high speed desktop computers, and the growth of the Internet, has led to the creation and growth of computerized adaptive testing or CAT. This form of assessment is applicable for both high stakes tests such as certification or licensure exams, as well as health related quality of life surveys. This article discusses the historical background of CAT including its many advantages over conventional (typically paper and pencil) alternatives. The process of CAT is then described including descriptions of the specific differences of using CAT based upon 1-, 2- and 3-parameter IRT and various Rasch models. Numerous specific topics describing CAT in practice are described including: initial item selection, content balancing, test difficulty, test length and stopping rules. The article concludes with the author{\textquoteright}s reflections regarding the future of CAT.}, keywords = {*Internet, *Models, Statistical, *User-Computer Interface, Certification, Health Surveys, Humans, Licensure, Microcomputers, Quality of Life}, isbn = {1529-7713 (Print)}, author = {Gershon, R. C.} } @article {556, title = {Computer adaptive testing}, journal = {Journal of Applied Measurement }, volume = {6}, year = {2005}, pages = {109-27}, author = {Gershon, R. C.} } @article {171, title = {A computer adaptive testing approach for assessing physical functioning in children and adolescents}, journal = {Developmental Medicine and Child Neuropsychology}, volume = {47}, number = {2}, year = {2005}, note = {Haley, Stephen MNi, PengshengFragala-Pinkham, Maria ASkrinar, Alison MCorzo, DeyaniraComparative StudyResearch Support, Non-U.S. Gov{\textquoteright}tEnglandDevelopmental medicine and child neurologyDev Med Child Neurol. 2005 Feb;47(2):113-20.}, month = {Feb}, pages = {113-120}, edition = {2005/02/15}, abstract = {The purpose of this article is to demonstrate: (1) the accuracy and (2) the reduction in amount of time and effort in assessing physical functioning (self-care and mobility domains) of children and adolescents using computer-adaptive testing (CAT). A CAT algorithm selects questions directly tailored to the child{\textquoteright}s ability level, based on previous responses. Using a CAT algorithm, a simulation study was used to determine the number of items necessary to approximate the score of a full-length assessment. We built simulated CAT (5-, 10-, 15-, and 20-item versions) for self-care and mobility domains and tested their accuracy in a normative sample (n=373; 190 males, 183 females; mean age 6y 11mo [SD 4y 2m], range 4mo to 14y 11mo) and a sample of children and adolescents with Pompe disease (n=26; 21 males, 5 females; mean age 6y 1mo [SD 3y 10mo], range 5mo to 14y 10mo). Results indicated that comparable score estimates (based on computer simulations) to the full-length tests can be achieved in a 20-item CAT version for all age ranges and for normative and clinical samples. No more than 13 to 16\% of the items in the full-length tests were needed for any one administration. These results support further consideration of using CAT programs for accurate and efficient clinical assessments of physical functioning.}, keywords = {*Computer Systems, Activities of Daily Living, Adolescent, Age Factors, Child, Child Development/*physiology, Child, Preschool, Computer Simulation, Confidence Intervals, Demography, Female, Glycogen Storage Disease Type II/physiopathology, Health Status Indicators, Humans, Infant, Infant, Newborn, Male, Motor Activity/*physiology, Outcome Assessment (Health Care)/*methods, Reproducibility of Results, Self Care, Sensitivity and Specificity}, isbn = {0012-1622 (Print)}, author = {Haley, S. M. and Ni, P. and Fragala-Pinkham, M. A. and Skrinar, A. M. and Corzo, D.} } @inbook {1760, title = {Computer adaptive testing quality requirements}, year = {2005}, address = {Proceedings E-Learn 2005 World Conference on E-Learning in Corporate, Government, Healthcare, and Higher Education, pp. 288-295, Vancouver, Canada, AACE, October 2005.}, author = {Economides, A. A.} } @article {192, title = {A computer-assisted test design and diagnosis system for use by classroom teachers}, journal = {Journal of Computer Assisted Learning}, volume = {21}, number = {6}, year = {2005}, pages = {419-429}, abstract = {Computer-assisted assessment (CAA) has become increasingly important in education in recent years. A variety of computer software systems have been developed to help assess the performance of students at various levels. However, such systems are primarily designed to provide objective assessment of students and analysis of test items, and focus has been mainly placed on higher and further education. Although there are commercial professional systems available for use by primary and secondary educational institutions, such systems are generally expensive and require skilled expertise to operate. In view of the rapid progress made in the use of computer-based assessment for primary and secondary students by education authorities here in the UK and elsewhere, there is a need to develop systems which are economic and easy to use and can provide the necessary information that can help teachers improve students{\textquoteright} performance. This paper presents the development of a software system that provides a range of functions including generating items and building item banks, designing tests, conducting tests on computers and analysing test results. Specifically, the system can generate information on the performance of students and test items that can be easily used to identify curriculum areas where students are under performing. A case study based on data collected from five secondary schools in Hong Kong involved in the Curriculum, Evaluation and Management Centre{\textquoteright}s Middle Years Information System Project, Durham University, UK, has been undertaken to demonstrate the use of the system for diagnostic and performance analysis. (PsycINFO Database Record (c) 2006 APA ) (journal abstract)}, keywords = {Computer Assisted Testing, Computer Software, Diagnosis, Educational Measurement, Teachers}, author = {He, Q. and Tymms, P.} } @article {249, title = {Computerized adaptive testing: a mixture item selection approach for constrained situations}, journal = {British Journal of Mathematical and Statistical Psychology}, volume = {58}, number = {2}, year = {2005}, note = {Leung, Chi-KeungChang, Hua-HuaHau, Kit-TaiEnglandBr J Math Stat Psychol. 2005 Nov;58(Pt 2):239-57.}, month = {Nov}, pages = {239-57}, edition = {2005/11/19}, abstract = {In computerized adaptive testing (CAT), traditionally the most discriminating items are selected to provide the maximum information so as to attain the highest efficiency in trait (theta) estimation. The maximum information (MI) approach typically results in unbalanced item exposure and hence high item-overlap rates across examinees. Recently, Yi and Chang (2003) proposed the multiple stratification (MS) method to remedy the shortcomings of MI. In MS, items are first sorted according to content, then difficulty and finally discrimination parameters. As discriminating items are used strategically, MS offers a better utilization of the entire item pool. However, for testing with imposed non-statistical constraints, this new stratification approach may not maintain its high efficiency. Through a series of simulation studies, this research explored the possible benefits of a mixture item selection approach (MS-MI), integrating the MS and MI approaches, in testing with non-statistical constraints. In all simulation conditions, MS consistently outperformed the other two competing approaches in item pool utilization, while the MS-MI and the MI approaches yielded higher measurement efficiency and offered better conformity to the constraints. Furthermore, the MS-MI approach was shown to perform better than MI on all evaluation criteria when control of item exposure was imposed.}, keywords = {*Computer-Aided Design, *Educational Measurement/methods, *Models, Psychological, Humans, Psychometrics/methods}, isbn = {0007-1102 (Print)0007-1102 (Linking)}, author = {Leung, C. K. and Chang, Hua-Hua and Hau, K. T.} } @article {2143, title = {Computerized Adaptive Testing With the Partial Credit Model: Estimation Procedures, Population Distributions, and Item Pool Characteristics}, journal = {Applied Psychological Measurement}, volume = {29}, number = {6}, year = {2005}, pages = {433-456}, abstract = {

The primary purpose of this research is to examine the impact of estimation methods, actual latent trait distributions, and item pool characteristics on the performance of a simulated computerized adaptive testing (CAT) system. In this study, three estimation procedures are compared for accuracy of estimation: maximum likelihood estimation (MLE), expected a priori (EAP), and Warm\&$\#$39;s weighted likelihood estimation (WLE). Some research has shown that MLE and EAP perform equally well under certain conditions in polytomous CAT systems, such that they match the actual latent trait distribution. However, little research has compared these methods when prior estimates of. distributions are extremely poor. In general, it appears that MLE, EAP, and WLE procedures perform equally well when using an optimal item pool. However, the use of EAP procedures may be advantageous under nonoptimal testing conditions when the item pool is not appropriately matched to the examinees.

}, doi = {10.1177/0146621605280072}, url = {http://apm.sagepub.com/content/29/6/433.abstract}, author = {Gorin, Joanna S. and Dodd, Barbara G. and Fitzpatrick, Steven J. and Shieh, Yann Yann} } @article {558, title = {Computerized adaptive testing with the partial credit model: Estimation procedures, population distributions, and item pool characteristics}, journal = {Applied Psychological Measurement}, volume = {29}, year = {2005}, pages = {533-546}, author = {Gorin, J. and Dodd, B. G. and Fitzpatrick, S. J. and Shieh, Y. Y.} } @article {159, title = {Computerized adaptive testing with the partial credit model: Estimation procedures, population distributions, and item pool characteristics}, journal = {Applied Psychological Measurement}, volume = {29}, number = {6}, year = {2005}, pages = {433-456}, isbn = {0146-6216}, author = {Gorin, J. S.} } @article {2207, title = {Computerizing statewide assessments in Minnesota: A report on the feasibility of converting the Minnesota Comprehensive Assessments to a computerized adaptive format}, year = {2005}, publisher = {Office of Educational Accountability, College of Education and Human Development, University of Minnesota}, author = {Peterson, K.A. and Davison. M. L. and Hjelseth, L.} } @booklet {1540, title = {Constraining item exposure in computerized adaptive testing with shadow tests}, year = {2005}, address = {Law School Admission Council Computerized Testing Report 02-03}, author = {van der Linden, W. J. and Veldkamp, B. P.} } @article {2197, title = {Constructing a Computerized Adaptive Test for University Applicants With Disabilities}, journal = {Applied Measurement in Education}, volume = {18}, number = {4}, year = {2005}, pages = {381-405}, doi = {10.1207/s15324818ame1804_3}, url = {http://www.tandfonline.com/doi/abs/10.1207/s15324818ame1804_3}, author = {Moshinsky, Avital and Kazin, Cathrael} } @article {211, title = {Contemporary measurement techniques for rehabilitation outcomes assessment}, journal = {Journal of Rehabilitation Medicine}, volume = {37}, number = {6}, year = {2005}, note = {1650-1977 (Print)Journal ArticleReview}, pages = {339-345}, abstract = {In this article, we review the limitations of traditional rehabilitation functional outcome instruments currently in use within the rehabilitation field to assess Activity and Participation domains as defined by the International Classification of Function, Disability, and Health. These include a narrow scope of functional outcomes, data incompatibility across instruments, and the precision vs feasibility dilemma. Following this, we illustrate how contemporary measurement techniques, such as item response theory methods combined with computer adaptive testing methodology, can be applied in rehabilitation to design functional outcome instruments that are comprehensive in scope, accurate, allow for compatibility across instruments, and are sensitive to clinically important change without sacrificing their feasibility. Finally, we present some of the pressing challenges that need to be overcome to provide effective dissemination and training assistance to ensure that current and future generations of rehabilitation professionals are familiar with and skilled in the application of contemporary outcomes measurement.}, keywords = {*Disability Evaluation, Activities of Daily Living/classification, Disabled Persons/classification/*rehabilitation, Health Status Indicators, Humans, Outcome Assessment (Health Care)/*methods/standards, Recovery of Function, Research Support, N.I.H., Extramural, Research Support, U.S. Gov{\textquoteright}t, Non-P.H.S., Sensitivity and Specificity computerized adaptive testing}, author = {Jette, A. M. and Haley, S. M.} } @article {72, title = {Controlling item exposure and test overlap in computerized adaptive testing}, journal = {Applied Psychological Measurement}, volume = {29}, number = {3}, year = {2005}, pages = {204-217}, abstract = {This article proposes an item exposure control method, which is the extension of the Sympson and Hetter procedure and can provide item exposure control at both the item and test levels. Item exposure rate and test overlap rate are two indices commonly used to track item exposure in computerized adaptive tests. By considering both indices, item exposure can be monitored at both the item and test levels. To control the item exposure rate and test overlap rate simultaneously, the modified procedure attempted to control not only the maximum value but also the variance of item exposure rates. Results indicated that the item exposure rate and test overlap rate could be controlled simultaneously by implementing the modified procedure. Item exposure control was improved and precision of trait estimation decreased when a prespecified maximum test overlap rate was stringent. (PsycINFO Database Record (c) 2005 APA ) (journal abstract)}, keywords = {Adaptive Testing, Computer Assisted Testing, Item Content (Test) computerized adaptive testing}, author = {Chen, S-Y. and Lei, P-W.} } @article {504, title = {Controlling item exposure and test overlap in computerized adaptive testing}, journal = {Applied Psychological Measurement}, volume = {29(2)}, year = {2005}, pages = {204{\textendash}217}, author = {Chen, S.Y. and Lei, P. W.} } @article {2141, title = {Controlling Item Exposure and Test Overlap in Computerized Adaptive Testing}, journal = {Applied Psychological Measurement}, volume = {29}, number = {3}, year = {2005}, pages = {204-217}, abstract = {

This article proposes an item exposure control method, which is the extension of the Sympson and Hetter procedure and can provide item exposure control at both the item and test levels. Item exposure rate and test overlap rate are two indices commonly used to track item exposure in computerized adaptive tests. By considering both indices, item exposure can be monitored at both the item and test levels. To control the item exposure rate and test overlap rate simultaneously, the modified procedure attempted to control not only the maximum value but also the variance of item exposure rates. Results indicated that the item exposure rate and test overlap rate could be controlled simultaneously by implementing the modified procedure. Item exposure control was improved and precision of trait estimation decreased when a prespecified maximum test overlap rate was stringent.

}, doi = {10.1177/0146621604271495}, url = {http://apm.sagepub.com/content/29/3/204.abstract}, author = {Chen, Shu-Ying and Lei, Pui-Wa} } @article {121, title = {Data pooling and analysis to build a preliminary item bank: an example using bowel function in prostate cancer}, journal = {Evaluation and the Health Professions}, volume = {28}, number = {2}, year = {2005}, note = {0163-2787 (Print)Journal Article}, pages = {142-59}, abstract = {Assessing bowel function (BF) in prostate cancer can help determine therapeutic trade-offs. We determined the components of BF commonly assessed in prostate cancer studies as an initial step in creating an item bank for clinical and research application. We analyzed six archived data sets representing 4,246 men with prostate cancer. Thirty-one items from validated instruments were available for analysis. Items were classified into domains (diarrhea, rectal urgency, pain, bleeding, bother/distress, and other) then subjected to conventional psychometric and item response theory (IRT) analyses. Items fit the IRT model if the ratio between observed and expected item variance was between 0.60 and 1.40. Four of 31 items had inadequate fit in at least one analysis. Poorly fitting items included bleeding (2), rectal urgency (1), and bother/distress (1). A fifth item assessing hemorrhoids was poorly correlated with other items. Our analyses supported four related components of BF: diarrhea, rectal urgency, pain, and bother/distress.}, keywords = {*Quality of Life, *Questionnaires, Adult, Aged, Data Collection/methods, Humans, Intestine, Large/*physiopathology, Male, Middle Aged, Prostatic Neoplasms/*physiopathology, Psychometrics, Research Support, Non-U.S. Gov{\textquoteright}t, Statistics, Nonparametric}, author = {Eton, D. T. and Lai, J. S. and Cella, D. and Reeve, B. B. and Talcott, J. A. and Clark, J. A. and McPherson, C. P. and Litwin, M. S. and Moinpour, C. M.} } @article {571, title = {Design and evaluation of an XML-based platform-independent computerized adaptive testing system}, journal = {IEEE Transactions on Education}, volume = {48(2)}, year = {2005}, pages = {230-237}, author = {Ho, R.-G., and Yen, Y.-C.} } @article {546, title = {Development of a computer-adaptive test for depression (D-CAT)}, journal = {Quality of Life Research}, volume = {14}, year = {2005}, pages = {2277{\textendash}2291}, author = {Fliege, H. and Becker, J. and Walter, O. B. and Bjorner, J. B. and Klapp, B. F. and Rose, M.} } @inbook {1773, title = {The development of the adaptive item language assessment (AILA) for mixed-ability students}, year = {2005}, address = {Proceedings E-Learn 2005 World Conference on E-Learning in Corporate, Government, Healthcare, and Higher Education, 643-650, Vancouver, Canada, AACE, October 2005.}, author = {Giouroglou, H. and Economides, A. A.} } @article {85, title = {Dynamic assessment of health outcomes: Time to let the CAT out of the bag?}, journal = {Health Services Research}, volume = {40}, number = {5, part2}, year = {2005}, pages = {1694-1711}, publisher = {Blackwell Publishing: United Kingdom}, abstract = {Background: The use of item response theory (IRT) to measure self-reported outcomes has burgeoned in recent years. Perhaps the most important application of IRT is computer-adaptive testing (CAT), a measurement approach in which the selection of items is tailored for each respondent. Objective. To provide an introduction to the use of CAT in the measurement of health outcomes, describe several IRT models that can be used as the basis of CAT, and discuss practical issues associated with the use of adaptive scaling in research settings. Principal Points: The development of a CAT requires several steps that are not required in the development of a traditional measure including identification of "starting" and "stopping" rules. CAT{\textquoteright}s most attractive advantage is its efficiency. Greater measurement precision can be achieved with fewer items. Disadvantages of CAT include the high cost and level of technical expertise required to develop a CAT. Conclusions: Researchers, clinicians, and patients benefit from the availability of psychometrically rigorous measures that are not burdensome. CAT outcome measures hold substantial promise in this regard, but their development is not without challenges. (PsycINFO Database Record (c) 2007 APA, all rights reserved)}, keywords = {computer adaptive testing, Item Response Theory, self reported health outcomes}, isbn = {0017-9124 (Print); 1475-6773 (Electronic)}, author = {Cook, K. F. and O{\textquoteright}Malley, K. J. and Roddey, T. S.} } @conference {2220, title = {The effectiveness of using multiple item pools in computerized adaptive testing}, booktitle = {Annual meeting of the National Council on Measurement in Education }, year = {2005}, month = {04/2005}, address = {Montreal, Canada}, author = {Zhang, J. and Chang, H.} } @inbook {1738, title = {Features of the estimated sampling distribution of the ability estimate in computerized adaptive testing according to two stopping rules}, year = {2005}, address = {D. G. Englehard (Eds.), Objective measurement: Theory into practice. Volume 6.}, author = {Blais, J-G. and Ra{\^\i}che, G.} } @conference {1285, title = {Identifying practical indices for enhancing item pool security}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education (NCME)}, year = {2005}, address = {Montreal, Canada}, author = {Yi, Q. and Zhang, J. and Chang, Hua-Hua} } @inbook {1772, title = {An implemented theoretical framework for a common European foreign language adaptive assessment}, year = {2005}, address = {Proceedings ICODL 2005, 3rd ternational Conference on Open and Distance Learning {\textquoteright}Applications of Pedagogy and Technology{\textquoteright},339-350,Greek Open University, Patra, Greece}, author = {Giouroglou, H. and Economides, A. A.} } @booklet {1534, title = {Implementing content constraints in alpha-stratified adaptive testing using a shadow test approach}, year = {2005}, address = {Law School Admission Council, Computerized Testing Report 01-09}, author = {van der Linden, W. J. and Chang, Hua-Hua} } @article {253, title = {Increasing the homogeneity of CAT{\textquoteright}s item-exposure rates by minimizing or maximizing varied target functions while assembling shadow tests}, journal = {Journal of Educational Measurement}, volume = {42}, number = {3}, year = {2005}, pages = {245-269}, publisher = {Blackwell Publishing: United Kingdom}, abstract = {A computerized adaptive testing (CAT) algorithm that has the potential to increase the homogeneity of CATs item-exposure rates without significantly sacrificing the precision of ability estimates was proposed and assessed in the shadow-test (van der Linden \& Reese, 1998) CAT context. This CAT algorithm was formed by a combination of maximizing or minimizing varied target functions while assembling shadow tests. There were four target functions to be separately used in the first, second, third, and fourth quarter test of CAT. The elements to be used in the four functions were associated with (a) a random number assigned to each item, (b) the absolute difference between an examinee{\textquoteright}s current ability estimate and an item difficulty, (c) the absolute difference between an examinee{\textquoteright}s current ability estimate and an optimum item difficulty, and (d) item information. The results indicated that this combined CAT fully utilized all the items in the pool, reduced the maximum exposure rates, and achieved more homogeneous exposure rates. Moreover, its precision in recovering ability estimates was similar to that of the maximum item-information method. The combined CAT method resulted in the best overall results compared with the other individual CAT item-selection methods. The findings from the combined CAT are encouraging. Future uses are discussed. (PsycINFO Database Record (c) 2007 APA, all rights reserved)}, keywords = {algorithm, computerized adaptive testing, item exposure rate, shadow test, varied target function}, isbn = {0022-0655 (Print)}, author = {Li, Y. H. and Schafer, W. D.} } @article {208, title = {Infeasibility in automated test assembly models: A comparison study of different methods}, journal = {Journal of Educational Measurement}, volume = {42}, number = {3}, year = {2005}, pages = {223-243}, abstract = {Several techniques exist to automatically put together a test meeting a number of specifications. In an item bank, the items are stored with their characteristics. A test is constructed by selecting a set of items that fulfills the specifications set by the test assembler. Test assembly problems are often formulated in terms of a model consisting of restrictions and an objective to be maximized or minimized. A problem arises when it is impossible to construct a test from the item pool that meets all specifications, that is, when the model is not feasible. Several methods exist to handle these infeasibility problems. In this article, test assembly models resulting from two practical testing programs were reconstructed to be infeasible. These models were analyzed using methods that forced a solution (Goal Programming, Multiple-Goal Programming, Greedy Heuristic), that analyzed the causes (Relaxed and Ordered Deletion Algorithm (RODA), Integer Randomized Deletion Algorithm (IRDA), Set Covering (SC), and Item Sampling), or that analyzed the causes and used this information to force a solution (Irreducible Infeasible Set-Solver). Specialized methods such as the IRDA and the Irreducible Infeasible Set-Solver performed best. Recommendations about the use of different methods are given. (PsycINFO Database Record (c) 2005 APA ) (journal abstract)}, keywords = {Algorithms, Item Content (Test), Models, Test Construction}, author = {Huitzing, H. A. and Veldkamp, B. P. and Verschoor, A. J.} } @article {236, title = {An item bank was created to improve the measurement of cancer-related fatigue}, journal = {Journal of Clinical Epidemiology}, volume = {58}, number = {2}, year = {2005}, note = {Lai, Jin-SheiCella, DavidDineen, KellyBode, RitaVon Roenn, JamieGershon, Richard CShevrin, DanielEnglandJ Clin Epidemiol. 2005 Feb;58(2):190-7.}, month = {Feb}, pages = {190-7}, type = {Multicenter Study}, edition = {2005/02/01}, abstract = {OBJECTIVE: Cancer-related fatigue (CRF) is one of the most common unrelieved symptoms experienced by patients. CRF is underrecognized and undertreated due to a lack of clinically sensitive instruments that integrate easily into clinics. Modern computerized adaptive testing (CAT) can overcome these obstacles by enabling precise assessment of fatigue without requiring the administration of a large number of questions. A working item bank is essential for development of a CAT platform. The present report describes the building of an operational item bank for use in clinical settings with the ultimate goal of improving CRF identification and treatment. STUDY DESIGN AND SETTING: The sample included 301 cancer patients. Psychometric properties of items were examined by using Rasch analysis, an Item Response Theory (IRT) model. RESULTS AND CONCLUSION: The final bank includes 72 items. These 72 unidimensional items explained 57.5\% of the variance, based on factor analysis results. Excellent internal consistency (alpha=0.99) and acceptable item-total correlation were found (range: 0.51-0.85). The 72 items covered a reasonable range of the fatigue continuum. No significant ceiling effects, floor effects, or gaps were found. A sample short form was created for demonstration purposes. The resulting bank is amenable to the development of a CAT platform.}, keywords = {Adult, Aged, Aged, 80 and over, Factor Analysis, Statistical, Fatigue/*etiology/psychology, Female, Humans, Male, Middle Aged, Neoplasms/*complications/psychology, Psychometrics, Questionnaires}, isbn = {0895-4356 (Print)0895-4356 (Linking)}, author = {Lai, J-S. and Cella, D. and Dineen, K. and Bode, R. and Von Roenn, J. and Gershon, R. C. and Shevrin, D.} } @article {218, title = {[Item characteristic curve equating under graded response models in IRT]}, journal = {Acta Psychologica Sinica}, volume = {37}, number = {6}, year = {2005}, pages = {832-838}, publisher = {Science Press: China}, abstract = {In one of the largest qualificatory tests--economist test, to guarantee the comparability among different years, construct item bank and prepare for computerized adaptive testing, item characteristic curve equating and anchor test equating design under graded models in IRT are used, which have realized the item and ability parameter equating of test data in five years and succeeded in establishing an item bank. Based on it, cut scores of different years are compared by equating and provide demonstrational gist to constitute the eligibility standard of economist test. }, keywords = {graded response models, item characteristic curve, Item Response Theory}, isbn = {0439-755X (Print)}, author = {Jun, Z. and Dongming, O. and Shuyuan, X. and Haiqi, D. and Shuqing, Q.} } @article {2131, title = {Item response theory in computer adaptive testing: implications for outcomes measurement in rehabilitation}, journal = {Rehabil Psychol}, volume = {50}, year = {2005}, pages = {71-78}, author = {Ware, J. E and Gandek, B. and Sinclair, S. J. and Bjorner, J. B.} } @article {240, title = {An item response theory-based pain item bank can enhance measurement precision}, journal = {Journal of Pain and Symptom Management}, volume = {30}, number = {3}, year = {2005}, note = {0885-3924Journal Article}, pages = {278-88}, abstract = {Cancer-related pain is often under-recognized and undertreated. This is partly due to the lack of appropriate assessments, which need to be comprehensive and precise yet easily integrated into clinics. Computerized adaptive testing (CAT) can enable precise-yet-brief assessments by only selecting the most informative items from a calibrated item bank. The purpose of this study was to create such a bank. The sample included 400 cancer patients who were asked to complete 61 pain-related items. Data were analyzed using factor analysis and the Rasch model. The final bank consisted of 43 items which satisfied the measurement requirement of factor analysis and the Rasch model, demonstrated high internal consistency and reasonable item-total correlations, and discriminated patients with differing degrees of pain. We conclude that this bank demonstrates good psychometric properties, is sensitive to pain reported by patients, and can be used as the foundation for a CAT pain-testing platform for use in clinical practice.}, keywords = {computerized adaptive testing}, author = {Lai, J-S. and Dineen, K. and Reeve, B. B. and Von Roenn, J. and Shervin, D. and McGuire, M. and Bode, R. K. and Paice, J. and Cella, D.} } @article {289, title = {La Validez desde una {\'o}ptica psicom{\'e}trica [Validity from a psychometric perspective]}, journal = {Acta Comportamentalia}, volume = {13}, number = {1}, year = {2005}, pages = {9-20}, abstract = {El estudio de la validez constituye el eje central de los an{\'a}lisis psicom{\'e}tricos de los instrumentos de medida. En esta comunicaci{\'o}n se traza una breve nota hist{\'o}rica de los distintos modos de concebir la validez a lo largo de los tiempos, se comentan las l{\'\i}neas actuales, y se tratan de vislumbrar posibles v{\'\i}as futuras, teniendo en cuenta el impacto que las nuevas tecnolog{\'\i}as inform{\'a}ticas est{\'a}n ejerciendo sobre los propios instrumentos de medida en Psicolog{\'\i}a y Educaci{\'o}n. Cuestiones como los nuevos formatos multimedia de los {\'\i}tems, la evaluaci{\'o}n a distancia, el uso intercultural de las pruebas, las consecuencias de su uso, o los tests adaptativos informatizados, reclaman nuevas formas de evaluar y conceptualizar la validez. Tambi{\'e}n se analizan cr{\'\i}ticamente algunos planteamientos recientes sobre el concepto de validez. The study of validity constitutes a central axis of psychometric analyses of measurement instruments. This paper presents a historical sketch of different modes of conceiving validity, with commentary on current views, and it attempts to predict future lines of research by considering the impact of new computerized technologies on measurement instruments in psychology and education. Factors such as the new multimedia format of items, distance assessment, the intercultural use of tests, the consequences of the latter, or the development of computerized adaptive tests demand new ways of conceiving and evaluating validity. Some recent thoughts about the concept of validity are also critically analyzed. (PsycINFO Database Record (c) 2005 APA ) (journal abstract)}, keywords = {Factor Analysis, Measurement, Psychometrics, Scaling (Testing), Statistical, Technology, Test Validity}, author = {Mu{\~n}iz, J.} } @article {357, title = {Measuring physical function in patients with complex medical and postsurgical conditions: a computer adaptive approach}, journal = {American Journal of Physical Medicine and Rehabilitation}, volume = {84}, number = {10}, year = {2005}, note = {0894-9115 (Print)Comparative StudyJournal ArticleResearch Support, N.I.H., ExtramuralResearch Support, U.S. Gov{\textquoteright}t, P.H.S.}, month = {Oct}, pages = {741-8}, abstract = {OBJECTIVE: To examine whether the range of disability in the medically complex and postsurgical populations receiving rehabilitation is adequately sampled by the new Activity Measure--Post-Acute Care (AM-PAC), and to assess whether computer adaptive testing (CAT) can derive valid patient scores using fewer questions. DESIGN: Observational study of 158 subjects (mean age 67.2 yrs) receiving skilled rehabilitation services in inpatient (acute rehabilitation hospitals, skilled nursing facility units) and community (home health services, outpatient departments) settings for recent-onset or worsening disability from medical (excluding neurological) and surgical (excluding orthopedic) conditions. Measures were interviewer-administered activity questions (all patients) and physical functioning portion of the SF-36 (outpatients) and standardized chart items (11 Functional Independence Measure (FIM), 19 Standardized Outcome and Assessment Information Set (OASIS) items, and 22 Minimum Data Set (MDS) items). Rasch modeling analyzed all data and the relationship between person ability estimates and average item difficulty. CAT assessed the ability to derive accurate patient scores using a sample of questions. RESULTS: The 163-item activity item pool covered the range of physical movement and personal and instrumental activities. CAT analysis showed comparable scores between estimates using 10 items or the total item pool. CONCLUSION: The AM-PAC can assess a broad range of function in patients with complex medical illness. CAT achieves valid patient scores using fewer questions.}, keywords = {Activities of Daily Living/*classification, Adult, Aged, Cohort Studies, Continuity of Patient Care, Disability Evaluation, Female, Health Services Research, Humans, Male, Middle Aged, Postoperative Care/*rehabilitation, Prognosis, Recovery of Function, Rehabilitation Centers, Rehabilitation/*standards, Sensitivity and Specificity, Sickness Impact Profile, Treatment Outcome}, author = {Siebens, H. and Andres, P. L. and Pengsheng, N. and Coster, W. J. and Haley, S. M.} } @article {2142, title = {Monte Carlo Test Assembly for Item Pool Analysis and Extension}, journal = {Applied Psychological Measurement}, volume = {29}, number = {4}, year = {2005}, pages = {239-261}, abstract = {

A new test assembly algorithm based on a Monte Carlo random search is presented in this article. A major advantage of the Monte Carlo test assembly over other approaches (integer programming or enumerative heuristics) is that it performs a uniform sampling from the item pool, which provides every feasible item combination (test) with an equal chance of being built during an assembly. This allows the authors to address the following issues of pool analysis and extension: compare the strengths and weaknesses of different pools, identify the most restrictive constraint(s) for test assembly, and identify properties of the items that should be added to a pool to achieve greater usability of the pool. Computer experiments with operational pools are given.

}, doi = {10.1177/0146621605275413}, url = {http://apm.sagepub.com/content/29/4/239.abstract}, author = {Belov, Dmitry I. and Armstrong, Ronald D.} } @inbook {1761, title = {Personalized feedback in CAT}, year = {2005}, address = {WSEAS Transactions on Advances in Engineering Education, Issue 3, Volume 2,174-181, July 2005.}, author = {Economides, A. A.} } @article {142, title = {The promise of PROMIS: using item response theory to improve assessment of patient-reported outcomes}, journal = {Clinical and Experimental Rheumatology}, volume = {23}, number = {5 Suppl 39}, year = {2005}, pages = {S53-7}, abstract = {PROMIS (Patient-Reported-Outcomes Measurement Information System) is an NIH Roadmap network project intended to improve the reliability, validity, and precision of PROs and to provide definitive new instruments that will exceed the capabilities of classic instruments and enable improved outcome measurement for clinical research across all NIH institutes. Item response theory (IRT) measurement models now permit us to transition conventional health status assessment into an era of item banking and computerized adaptive testing (CAT). Item banking uses IRT measurement models and methods to develop item banks from large pools of items from many available questionnaires. IRT allows the reduction and improvement of items and assembles domains of items which are unidimensional and not excessively redundant. CAT provides a model-driven algorithm and software to iteratively select the most informative remaining item in a domain until a desired degree of precision is obtained. Through these approaches the number of patients required for a clinical trial may be reduced while holding statistical power constant. PROMIS tools, expected to improve precision and enable assessment at the individual patient level which should broaden the appeal of PROs, will begin to be available to the general medical community in 2008.}, keywords = {computerized adaptive testing}, author = {Fries, J.F. and Bruce, B. and Cella, D.} } @article {4, title = {Propiedades psicom{\'e}tricas de un test Adaptativo Informatizado para la medici{\'o}n del ajuste emocional [Psychometric properties of an Emotional Adjustment Computerized Adaptive Test]}, journal = {Psicothema}, volume = {17}, number = {3}, year = {2005}, pages = {484-491}, abstract = {En el presente trabajo se describen las propiedades psicom{\'e}tricas de un Test Adaptativo Informatizado para la medici{\'o}n del ajuste emocional de las personas. La revisi{\'o}n de la literatura acerca de la aplicaci{\'o}n de los modelos de la teor{\'\i}a de la respuesta a los {\'\i}tems (TRI) muestra que {\'e}sta se ha utilizado m{\'a}s en el trabajo con variables aptitudinales que para la medici{\'o}n de variables de personalidad, sin embargo diversos estudios han mostrado la eficacia de la TRI para la descripci{\'o}n psicom{\'e}trica de dichasvariables. Aun as{\'\i}, pocos trabajos han explorado las caracter{\'\i}sticas de un Test Adaptativo Informatizado, basado en la TRI, para la medici{\'o}n de una variable de personalidad como es el ajuste emocional. Nuestros resultados muestran la eficiencia del TAI para la evaluaci{\'o}n del ajuste emocional, proporcionando una medici{\'o}n v{\'a}lida y precisa, utilizando menor n{\'u}mero de elementos de medida encomparaci{\'o}n con las escalas de ajuste emocional de instrumentos fuertemente implantados. Psychometric properties of an emotional adjustment computerized adaptive test. In the present work it was described the psychometric properties of an emotional adjustment computerized adaptive test. An examination of Item Response Theory (IRT) research literature indicates that IRT has been mainly used for assessing achievements and ability rather than personality factors. Nevertheless last years have shown several studies wich have successfully used IRT to personality assessment instruments. Even so, a few amount of works has inquired the computerized adaptative test features, based on IRT, for the measurement of a personality traits as it{\textquoteright}s the emotional adjustment. Our results show the CAT efficiency for the emotional adjustment assessment so this provides a valid and accurate measurement; by using a less number of items in comparison with the emotional adjustment scales from the most strongly established questionnaires.}, keywords = {Computer Assisted Testing, Emotional Adjustment, Item Response, Personality Measures, Psychometrics, Test Validity, Theory}, author = {Aguado, D. and Rubio, V. J. and Hontangas, P. M. and Hern{\'a}ndez, J. M.} } @article {198, title = {A randomized experiment to compare conventional, computerized, and computerized adaptive administration of ordinal polytomous attitude items}, journal = {Applied Psychological Measurement}, volume = {29}, number = {3}, year = {2005}, pages = {159-183}, abstract = {A total of 520 high school students were randomly assigned to a paper-and-pencil test (PPT), a computerized standard test (CST), or a computerized adaptive test (CAT) version of the Dutch School Attitude Questionnaire (SAQ), consisting of ordinal polytomous items. The CST administered items in the same order as the PPT. The CAT administered all items of three SAQ subscales in adaptive order using Samejima{\textquoteright}s graded response model, so that six different stopping rule settings could be applied afterwards. School marks were used as external criteria. Results showed significant but small multivariate administration mode effects on conventional raw scores and small to medium effects on maximum likelihood latent trait estimates. When the precision of CAT latent trait estimates decreased, correlations with grade point average in general decreased. However, the magnitude of the decrease was not very large as compared to the PPT, the CST, and the CAT without the stopping rule. (PsycINFO Database Record (c) 2005 APA ) (journal abstract)}, keywords = {Computer Assisted Testing, Test Administration, Test Items}, author = {Hol, A. M. and Vorst, H. C. M. and Mellenbergh, G. J.} } @article {2140, title = {A Randomized Experiment to Compare Conventional, Computerized, and Computerized Adaptive Administration of Ordinal Polytomous Attitude Items}, journal = {Applied Psychological Measurement}, volume = {29}, number = {3}, year = {2005}, pages = {159-183}, abstract = {

A total of 520 high school students were randomly assigned to a paper-and-pencil test (PPT), a computerized standard test (CST), or a computerized adaptive test (CAT) version of the Dutch School Attitude Questionnaire (SAQ), consisting of ordinal polytomous items. The CST administered items in the same order as the PPT. The CAT administered all items of three SAQ subscales in adaptive order using Samejima\’s graded response model, so that six different stopping rule settings could be applied afterwards. School marks were used as external criteria. Results showed significant but small multivariate administration mode effects on conventional raw scores and small to medium effects on maximum likelihood latent trait estimates. When the precision of CAT latent trait estimates decreased, correlations with grade point average in general decreased. However, the magnitude of the decrease was not very large as compared to the PPT, the CST, and the CAT without the stopping rule.

}, doi = {10.1177/0146621604271268}, url = {http://apm.sagepub.com/content/29/3/159.abstract}, author = {Hol, A. Michiel and Vorst, Harrie C. M. and Mellenbergh, Gideon J.} } @article {304, title = {Recent trends in comparability studies}, number = {05-05}, year = {2005}, month = {August, 2005}, institution = {Pearson}, keywords = {computer adaptive testing, Computerized assessment, differential item functioning, Mode effects}, isbn = {05-05}, author = {Paek, P.} } @conference {2135, title = {Rescuing CAT by fixing the problems}, booktitle = {National Council on Measurement in Education}, year = {2005}, address = {Montreal, Canada}, author = {Chang, S-H. and Zhang, J.} } @article {185, title = {Simulated computerized adaptive tests for measuring functional status were efficient with good discriminant validity in patients with hip, knee, or foot/ankle impairments}, journal = {Journal of Clinical Epidemiology}, volume = {58}, number = {6}, year = {2005}, note = {0895-4356 (Print)Journal ArticleMulticenter StudyValidation Studies}, pages = {629-38}, abstract = {BACKGROUND AND OBJECTIVE: To develop computerized adaptive tests (CATs) designed to assess lower extremity functional status (FS) in people with lower extremity impairments using items from the Lower Extremity Functional Scale and compare discriminant validity of FS measures generated using all items analyzed with a rating scale Item Response Theory model (theta(IRT)) and measures generated using the simulated CATs (theta(CAT)). METHODS: Secondary analysis of retrospective intake rehabilitation data. RESULTS: Unidimensionality of items was strong, and local independence of items was adequate. Differential item functioning (DIF) affected item calibration related to body part, that is, hip, knee, or foot/ankle, but DIF did not affect item calibration for symptom acuity, gender, age, or surgical history. Therefore, patients were separated into three body part specific groups. The rating scale model fit all three data sets well. Three body part specific CATs were developed: each was 70\% more efficient than using all LEFS items to estimate FS measures. theta(IRT) and theta(CAT) measures discriminated patients by symptom acuity, age, and surgical history in similar ways. theta(CAT) measures were as precise as theta(IRT) measures. CONCLUSION: Body part-specific simulated CATs were efficient and produced precise measures of FS with good discriminant validity.}, keywords = {*Health Status Indicators, Activities of Daily Living, Adolescent, Adult, Aged, Aged, 80 and over, Ankle Joint/physiopathology, Diagnosis, Computer-Assisted/*methods, Female, Hip Joint/physiopathology, Humans, Joint Diseases/physiopathology/*rehabilitation, Knee Joint/physiopathology, Lower Extremity/*physiopathology, Male, Middle Aged, Research Support, N.I.H., Extramural, Research Support, U.S. Gov{\textquoteright}t, P.H.S., Retrospective Studies}, author = {Hart, D. L. and Mioduski, J. E. and Stratford, P. W.} } @article {282, title = {Somministrazione di test computerizzati di tipo adattivo: Un{\textquoteright} applicazione del modello di misurazione di Rasch [Administration of computerized and adaptive tests: An application of the Rasch Model]}, journal = {Testing Psicometria Metodologia}, volume = {12}, number = {3}, year = {2005}, pages = {131-149}, abstract = {The aim of the present study is to describe the characteristics of a procedure for administering computerized and adaptive tests (Computer Adaptive Testing or CAT). Items to be asked to the individuals are interactively chosen and are selected from a "bank" in which they were previously calibrated and recorded on the basis of their difficulty level. The selection of items is performed by increasingly more accurate estimates of the examinees{\textquoteright} ability. The building of an item-bank on Psychometrics and the implementation of this procedure allow a first validation through Monte Carlo simulations. (PsycINFO Database Record (c) 2006 APA ) (journal abstract)}, keywords = {Adaptive Testing, Computer Assisted Testing, Item Response Theory computerized adaptive testing, Models, Psychometrics}, author = {Miceli, R. and Molinengo, G.} } @booklet {1358, title = {Strategies for controlling item exposure in computerized adaptive testing with the partial credit model}, year = {2005}, address = {Pearson Educational Measurement Research Report 05-01}, author = {Davis, L. L. and Dodd, B.} } @article {195, title = {Test construction for cognitive diagnosis}, journal = {Applied Psychological Measurement}, volume = {29}, number = {4}, year = {2005}, pages = {262-277}, abstract = {Although cognitive diagnostic models (CDMs) can be useful in the analysis and interpretation of existing tests, little has been developed to specify how one might construct a good test using aspects of the CDMs. This article discusses the derivation of a general CDM index based on Kullback-Leibler information that will serve as a measure of how informative an item is for the classification of examinees. The effectiveness of the index is examined for items calibrated using the deterministic input noisy "and" gate model (DINA) and the reparameterized unified model (RUM) by implementing a simple heuristic to construct a test from an item bank. When compared to randomly constructed tests from the same item bank, the heuristic shows significant improvement in classification rates. (PsycINFO Database Record (c) 2005 APA ) (journal abstract)}, keywords = {(Measurement), Cognitive Assessment, Item Analysis (Statistical), Profiles, Test Construction, Test Interpretation, Test Items}, author = {Henson, R. K. and Douglas, J.} } @article {219, title = {Toward efficient and comprehensive measurement of the alcohol problems continuum in college students: The Brief Young Adult Alcohol Consequences Questionnaire}, journal = {Alcoholism: Clinical \& Experimental Research}, volume = {29}, number = {7}, year = {2005}, note = {MiscellaneousArticleMiscellaneous Article}, pages = {1180-1189}, abstract = {Background: Although a number of measures of alcohol problems in college students have been studied, the psychometric development and validation of these scales have been limited, for the most part, to methods based on classical test theory. In this study, we conducted analyses based on item response theory to select a set of items for measuring the alcohol problem severity continuum in college students that balances comprehensiveness and efficiency and is free from significant gender bias., Method: We conducted Rasch model analyses of responses to the 48-item Young Adult Alcohol Consequences Questionnaire by 164 male and 176 female college students who drank on at least a weekly basis. An iterative process using item fit statistics, item severities, item discrimination parameters, model residuals, and analysis of differential item functioning by gender was used to pare the items down to those that best fit a Rasch model and that were most efficient in discriminating among levels of alcohol problems in the sample., Results: The process of iterative Rasch model analyses resulted in a final 24-item scale with the data fitting the unidimensional Rasch model very well. The scale showed excellent distributional properties, had items adequately matched to the severity of alcohol problems in the sample, covered a full range of problem severity, and appeared highly efficient in retaining all of the meaningful variance captured by the original set of 48 items., Conclusions: The use of Rasch model analyses to inform item selection produced a final scale that, in both its comprehensiveness and its efficiency, should be a useful tool for researchers studying alcohol problems in college students. To aid interpretation of raw scores, examples of the types of alcohol problems that are likely to be experienced across a range of selected scores are provided., (C)2005Research Society on AlcoholismAn important, sometimes controversial feature of all psychological phenomena is whether they are categorical or dimensional. A conceptual and psychometric framework is described for distinguishing whether the latent structure behind manifest categories (e.g., psychiatric diagnoses, attitude groups, or stages of development) is category-like or dimension-like. Being dimension-like requires (a) within-category heterogeneity and (b) between-category quantitative differences. Being category-like requires (a) within-category homogeneity and (b) between-category qualitative differences. The relation between this classification and abrupt versus smooth differences is discussed. Hybrid structures are possible. Being category-like is itself a matter of degree; the authors offer a formalized framework to determine this degree. Empirical applications to personality disorders, attitudes toward capital punishment, and stages of cognitive development illustrate the approach., (C) 2005 by the American Psychological AssociationThe authors conducted Rasch model ( G. Rasch, 1960) analyses of items from the Young Adult Alcohol Problems Screening Test (YAAPST; S. C. Hurlbut \& K. J. Sher, 1992) to examine the relative severity and ordering of alcohol problems in 806 college students. Items appeared to measure a single dimension of alcohol problem severity, covering a broad range of the latent continuum. Items fit the Rasch model well, with less severe symptoms reliably preceding more severe symptoms in a potential progression toward increasing levels of problem severity. However, certain items did not index problem severity consistently across demographic subgroups. A shortened, alternative version of the YAAPST is proposed, and a norm table is provided that allows for a linking of total YAAPST scores to expected symptom expression., (C) 2004 by the American Psychological AssociationA didactic on latent growth curve modeling for ordinal outcomes is presented. The conceptual aspects of modeling growth with ordinal variables and the notion of threshold invariance are illustrated graphically using a hypothetical example. The ordinal growth model is described in terms of 3 nested models: (a) multivariate normality of the underlying continuous latent variables (yt) and its relationship with the observed ordinal response pattern (Yt), (b) threshold invariance over time, and (c) growth model for the continuous latent variable on a common scale. Algebraic implications of the model restrictions are derived, and practical aspects of fitting ordinal growth models are discussed with the help of an empirical example and Mx script ( M. C. Neale, S. M. Boker, G. Xie, \& H. H. Maes, 1999). The necessary conditions for the identification of growth models with ordinal data and the methodological implications of the model of threshold invariance are discussed., (C) 2004 by the American Psychological AssociationRecent research points toward the viability of conceptualizing alcohol problems as arrayed along a continuum. Nevertheless, modern statistical techniques designed to scale multiple problems along a continuum (latent trait modeling; LTM) have rarely been applied to alcohol problems. This study applies LTM methods to data on 110 problems reported during in-person interviews of 1,348 middle-aged men (mean age = 43) from the general population. The results revealed a continuum of severity linking the 110 problems, ranging from heavy and abusive drinking, through tolerance and withdrawal, to serious complications of alcoholism. These results indicate that alcohol problems can be arrayed along a dimension of severity and emphasize the relevance of LTM to informing the conceptualization and assessment of alcohol problems., (C) 2004 by the American Psychological AssociationItem response theory (IRT) is supplanting classical test theory as the basis for measures development. This study demonstrated the utility of IRT for evaluating DSM-IV diagnostic criteria. Data on alcohol, cannabis, and cocaine symptoms from 372 adult clinical participants interviewed with the Composite International Diagnostic Interview-Expanded Substance Abuse Module (CIDI-SAM) were analyzed with Mplus ( B. Muthen \& L. Muthen, 1998) and MULTILOG ( D. Thissen, 1991) software. Tolerance and legal problems criteria were dropped because of poor fit with a unidimensional model. Item response curves, test information curves, and testing of variously constrained models suggested that DSM-IV criteria in the CIDI-SAM discriminate between only impaired and less impaired cases and may not be useful to scale case severity. IRT can be used to study the construct validity of DSM-IV diagnoses and to identify diagnostic criteria with poor performance., (C) 2004 by the American Psychological AssociationThis study examined the psychometric characteristics of an index of substance use involvement using item response theory. The sample consisted of 292 men and 140 women who qualified for a Diagnostic and Statistical Manual of Mental Disorders (3rd ed., rev.; American Psychiatric Association, 1987) substance use disorder (SUD) diagnosis and 293 men and 445 women who did not qualify for a SUD diagnosis. The results indicated that men had a higher probability of endorsing substance use compared with women. The index significantly predicted health, psychiatric, and psychosocial disturbances as well as level of substance use behavior and severity of SUD after a 2-year follow-up. Finally, this index is a reliable and useful prognostic indicator of the risk for SUD and the medical and psychosocial sequelae of drug consumption., (C) 2002 by the American Psychological AssociationComparability, validity, and impact of loss of information of a computerized adaptive administration of the Minnesota Multiphasic Personality Inventory-2 (MMPI-2) were assessed in a sample of 140 Veterans Affairs hospital patients. The countdown method ( Butcher, Keller, \& Bacon, 1985) was used to adaptively administer Scales L (Lie) and F (Frequency), the 10 clinical scales, and the 15 content scales. Participants completed the MMPI-2 twice, in 1 of 2 conditions: computerized conventional test-retest, or computerized conventional-computerized adaptive. Mean profiles and test-retest correlations across modalities were comparable. Correlations between MMPI-2 scales and criterion measures supported the validity of the countdown method, although some attenuation of validity was suggested for certain health-related items. Loss of information incurred with this mode of adaptive testing has minimal impact on test validity. Item and time savings were substantial., (C) 1999 by the American Psychological Association}, keywords = {Psychometrics, Substance-Related Disorders}, author = {Kahler, C. W. and Strong, D. R. and Read, J. P. and De Boeck, P. and Wilson, M. and Acton, G. S. and Palfai, T. P. and Wood, M. D. and Mehta, P. D. and Neale, M. C. and Flay, B. R. and Conklin, C. A. and Clayton, R. R. and Tiffany, S. T. and Shiffman, S. and Krueger, R. F. and Nichol, P. E. and Hicks, B. M. and Markon, K. E. and Patrick, C. J. and Iacono, William G. and McGue, Matt and Langenbucher, J. W. and Labouvie, E. and Martin, C. S. and Sanjuan, P. M. and Bavly, L. and Kirisci, L. and Chung, T. and Vanyukov, M. and Dunn, M. and Tarter, R. and Handel, R. W. and Ben-Porath, Y. S. and Watt, M.} } @article {252, title = {Trait parameter recovery using multidimensional computerized adaptive testing in reading and mathematics}, journal = {Applied Psychological Measurement}, volume = {29}, number = {1}, year = {2005}, pages = {3-25}, abstract = {Under a multidimensional item response theory (MIRT) computerized adaptive testing (CAT) testing scenario, a trait estimate (θ) in onedimension will provide clues for subsequentlyseeking a solution in other dimensions. Thisfeature may enhance the efficiency of MIRT CAT{\textquoteright}s item selection and its scoring algorithms compared with its counterpart, the unidimensional CAT (UCAT). The present study used existing Reading and Math test data to generate simulated item parameters. A confirmatory item factor analysis model was applied to the data using NOHARM to produce interpretable MIRT item parameters. Results showed that MIRT CAT, conditional on theconstraints, was quite capable of producing accurate estimates on both measures. Compared with UCAT, MIRT CAT slightly increased the accuracy of both trait estimates, especially for the low-level or high-level trait examinees in both measures, and reduced the rate of unused items in the item pool. Index terms: computerized adaptive testing (CAT), item response theory (IRT), dimensionality, 0-1 linear programming, constraints, item exposure, reading assessment, mathematics assessment. }, isbn = {0146-6216}, author = {Li, Y. H.} } @article {2139, title = {Trait Parameter Recovery Using Multidimensional Computerized Adaptive Testing in Reading and Mathematics}, journal = {Applied Psychological Measurement}, volume = {29}, number = {1}, year = {2005}, pages = {3-25}, abstract = {

Under a multidimensional item response theory (MIRT) computerized adaptive testing (CAT) testing scenario, a trait estimate (\θ) in one dimension will provide clues for subsequently seeking a solution in other dimensions. This feature may enhance the efficiency of MIRT CAT\’s item selection and its scoring algorithms compared with its counterpart, the unidimensional CAT (UCAT). The present study used existing Reading and Math test data to generate simulated item parameters. A confirmatory item factor analysis model was applied to the data using NOHARM to produce interpretable MIRT item parameters. Results showed that MIRT CAT, conditional on the constraints, was quite capable of producing accurate estimates on both measures. Compared with UCAT, MIRT CAT slightly increased the accuracy of both trait estimates, especially for the low-level or high-level trait examinees in both measures, and reduced the rate of unused items in the item pool.

}, doi = {10.1177/0146621604270667}, url = {http://apm.sagepub.com/content/29/1/3.abstract}, author = {Li, Yuan H. and Schafer, William D.} } @article {281, title = {The use of person-fit statistics in computerized adaptive testing}, year = {2005}, month = {September, 2005}, institution = {Law School Administration Council}, address = {Newton, PA. USA}, isbn = {Computerized Testing Report 97-14}, author = {Meijer, R. R. and van Krimpen-Stoop, E. M. L. A.} } @article {701, title = {Validation of a computerized adaptive testing version of the Schedule for Nonadaptive and Adaptive Personality (SNAP)}, journal = {Psychological Assessment}, volume = {17(1)}, year = {2005}, pages = {28-43}, abstract = {This is a validation study of a computerized adaptive (CAT) version of the Schedule for Nonadaptive and Adaptive Personality (SNAP) conducted with 413 undergraduates who completed the SNAP twice, 1 week apart. Participants were assigned randomly to 1 of 4 retest groups: (a) paper-and-pencil (P\&P) SNAP, (b) CAT, (c) P\&P/CAT, and (d) CAT/P\&P. With number of items held constant, computerized administration had little effect on descriptive statistics, rank ordering of scores, reliability, and concurrent validity, but was preferred over P\&P administration by most participants. CAT administration yielded somewhat lower precision and validity than P\&P administration, but required 36\% to 37\% fewer items and 58\% to 60\% less time to complete. These results confirm not only key findings from previous CAT simulation studies of personality measures but extend them for the 1st time to a live assessment setting.}, author = {Simms, L. J., and Clark, L. A.} } @article {360, title = {Validation of a computerized adaptive version of the Schedule of Non-Adaptive and Adaptive Personality (SNAP)}, journal = {Psychological Assessment}, volume = {17}, number = {1}, year = {2005}, pages = {28-43}, abstract = { This is a validation study of a computerized adaptive (CAT) version of the Schedule for Nonadaptive and Adaptive Personality (SNAP) conducted with 413 undergraduates who completed the SNAP twice, 1 week apart. Participants were assigned randomly to 1 of 4 retest groups: (a) paper-and-pencil (P\&P) SNAP, (b) CAT, (c) P\&P/CAT, and (d) CAT/P\&P. With number of items held constant, computerized administration had little effect on descriptive statistics, rank ordering of scores, reliability, and concurrent validity, but was preferred over P\&P administration by most participants. CAT administration yielded somewhat lower precision and validity than P\&P administration, but required 36\% to 37\% fewer items and 58\% to 60\% less time to complete. These results confirm not only key findings from previous CAT simulation studies of personality measures but extend them for the 1st time to a live assessment setting. }, author = {Simms, L. J. and Clark, L.J.} } @inbook {1770, title = {The ABCs of Computerized Adaptive Testing}, year = {2004}, address = {T. M. Wood and W. Zhi (Eds.), Measurement issues and practice in physical activity. Champaign, IL: Human kinetics.}, author = {Gershon, R. C.} } @conference {1246, title = {Achieving accuracy of retest calibration for a national CAT placement examination with a restricted test length}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2004}, note = {$\#$WA04-01 {PDF file, 837 KB}}, address = {San Diego CA}, author = {Wang, X. B. and Wiley, A.} } @article {168, title = {Activity outcome measurement for postacute care}, journal = {Medical Care}, volume = {42}, number = {1 Suppl}, year = {2004}, note = {0025-7079Journal ArticleMulticenter Study}, pages = {I49-161}, abstract = {BACKGROUND: Efforts to evaluate the effectiveness of a broad range of postacute care services have been hindered by the lack of conceptually sound and comprehensive measures of outcomes. It is critical to determine a common underlying structure before employing current methods of item equating across outcome instruments for future item banking and computer-adaptive testing applications. OBJECTIVE: To investigate the factor structure, reliability, and scale properties of items underlying the Activity domains of the International Classification of Functioning, Disability and Health (ICF) for use in postacute care outcome measurement. METHODS: We developed a 41-item Activity Measure for Postacute Care (AM-PAC) that assessed an individual{\textquoteright}s execution of discrete daily tasks in his or her own environment across major content domains as defined by the ICF. We evaluated the reliability and discriminant validity of the prototype AM-PAC in 477 individuals in active rehabilitation programs across 4 rehabilitation settings using factor analyses, tests of item scaling, internal consistency reliability analyses, Rasch item response theory modeling, residual component analysis, and modified parallel analysis. RESULTS: Results from an initial exploratory factor analysis produced 3 distinct, interpretable factors that accounted for 72\% of the variance: Applied Cognition (44\%), Personal Care \& Instrumental Activities (19\%), and Physical \& Movement Activities (9\%); these 3 activity factors were verified by a confirmatory factor analysis. Scaling assumptions were met for each factor in the total sample and across diagnostic groups. Internal consistency reliability was high for the total sample (Cronbach alpha = 0.92 to 0.94), and for specific diagnostic groups (Cronbach alpha = 0.90 to 0.95). Rasch scaling, residual factor, differential item functioning, and modified parallel analyses supported the unidimensionality and goodness of fit of each unique activity domain. CONCLUSIONS: This 3-factor model of the AM-PAC can form the conceptual basis for common-item equating and computer-adaptive applications, leading to a comprehensive system of outcome instruments for postacute care settings.}, keywords = {*Self Efficacy, *Sickness Impact Profile, Activities of Daily Living/*classification/psychology, Adult, Aftercare/*standards/statistics \& numerical data, Aged, Boston, Cognition/physiology, Disability Evaluation, Factor Analysis, Statistical, Female, Human, Male, Middle Aged, Movement/physiology, Outcome Assessment (Health Care)/*methods/statistics \& numerical data, Psychometrics, Questionnaires/standards, Rehabilitation/*standards/statistics \& numerical data, Reproducibility of Results, Sensitivity and Specificity, Support, U.S. Gov{\textquoteright}t, Non-P.H.S., Support, U.S. Gov{\textquoteright}t, P.H.S.}, author = {Haley, S. M. and Coster, W. J. and Andres, P. L. and Ludlow, L. H. and Ni, P. and Bond, T. L. and Sinclair, S. J. and Jette, A. M.} } @inbook {322, title = {Adaptive computerized educational systems: A case study}, booktitle = {Evidence-based educational methods}, series = {Educational Psychology Series}, year = {2004}, note = {Using Smart Source ParsingEvidence-based educational methods. A volume in the educational psychology series. (pp. 143-170). San Diego, CA : Elsevier Academic Press, [URL:http://www.academicpress.com]. xxiv, 382 pp}, pages = {143-169}, publisher = {Elsevier Academic Press}, organization = {Elsevier Academic Press}, chapter = {10}, address = {San Diego, CA. USA}, abstract = {(Created by APA) Adaptive instruction describes adjustments typical of one-on-one tutoring as discussed in the college tutorial scenario. So computerized adaptive instruction refers to the use of computer software--almost always incorporating artificially intelligent services--which has been designed to adjust both the presentation of information and the form of questioning to meet the current needs of an individual learner. This chapter describes a system for Internet-delivered adaptive instruction. The author attempts to demonstrate a sharp difference between the teaching that takes place outside of the classroom in universities and the kind that is at least afforded, if not taken advantage of by many, students in a more personalized educational setting such as those in the small liberal arts colleges. The author describes a computer-based technology that allows that gap to be bridged with the advantage of at least having more highly prepared learners sitting in college classrooms. A limited range of emerging research that supports that proposition is cited. (PsycINFO Database Record (c) 2005 APA )}, keywords = {Artificial, Computer Assisted Instruction, Computer Software, Higher Education, Individualized, Instruction, Intelligence, Internet, Undergraduate Education}, author = {Ray, R. D.}, editor = {R. W. Malott} } @article {605, title = {Adaptive exploration of user knowledge in computer based testing}, journal = {WSEAS Transactions on Communications}, volume = {3 (1)}, year = {2004}, pages = {322-327}, author = {Lamboudis, D. and Economides, A. A.} } @article {2238, title = {Adaptive Testing With Regression Trees in the Presence of Multidimensionality}, journal = {Journal of Educational and Behavioral Statistics}, volume = {29}, number = {3}, year = {2004}, pages = {293-316}, abstract = {

It is unrealistic to suppose that standard item response theory (IRT) models will be appropriate for all the new and currently considered computer-based tests. In addition to developing new models, we also need to give attention to the possibility of constructing and analyzing new tests without the aid of strong models. Computerized adaptive testing currently relies heavily on IRT. Alternative, empirically based, nonparametric adaptive testing algorithms exist, but their properties are little known. This article introduces a nonparametric, tree-based algorithm for adaptive testing and shows that it may be superior to conventional, IRT-based adaptive testing in cases where the IRT assumptions are not satisfied. In particular, it shows that the tree-based approach clearly outperformed (one-dimensional) IRT when the pool was strongly two-dimensional.

}, doi = {10.3102/10769986029003293}, url = {http://jeb.sagepub.com/cgi/content/abstract/29/3/293}, author = {Yan, Duanli and Lewis, Charles and Stocking, Martha} } @booklet {201, title = {The AMC Linear Disability Score project in a population requiring residential care: psychometric properties}, journal = {Health and Quality of Life Outcomes}, volume = {2}, year = {2004}, note = {Holman, RebeccaLindeboom, RobertVermeulen, Marinusde Haan, Rob JResearch Support, Non-U.S. Gov{\textquoteright}tValidation StudiesEnglandHealth and quality of life outcomesHealth Qual Life Outcomes. 2004 Aug 3;2:42.}, month = {Aug 3}, pages = {42}, edition = {2004/08/05}, abstract = {BACKGROUND: Currently there is a lot of interest in the flexible framework offered by item banks for measuring patient relevant outcomes, including functional status. However, there are few item banks, which have been developed to quantify functional status, as expressed by the ability to perform activities of daily life. METHOD: This paper examines the psychometric properties of the AMC Linear Disability Score (ALDS) project item bank using an item response theory model and full information factor analysis. Data were collected from 555 respondents on a total of 160 items. RESULTS: Following the analysis, 79 items remained in the item bank. The remaining 81 items were excluded because of: difficulties in presentation (1 item); low levels of variation in response pattern (28 items); significant differences in measurement characteristics for males and females or for respondents under or over 85 years old (26 items); or lack of model fit to the data at item level (26 items). CONCLUSIONS: It is conceivable that the item bank will have different measurement characteristics for other patient or demographic populations. However, these results indicate that the ALDS item bank has sound psychometric properties for respondents in residential care settings and could form a stable base for measuring functional status in a range of situations, including the implementation of computerised adaptive testing of functional status.}, keywords = {*Disability Evaluation, *Health Status Indicators, Activities of Daily Living/*classification, Adult, Aged, Aged, 80 and over, Data Collection/methods, Female, Humans, Logistic Models, Male, Middle Aged, Netherlands, Pilot Projects, Probability, Psychometrics/*instrumentation, Questionnaires/standards, Residential Facilities/*utilization, Severity of Illness Index}, isbn = {1477-7525 (Electronic)1477-7525 (Linking)}, author = {Holman, R. and Lindeboom, R. and Vermeulen, M. and de Haan, R. J.} } @book {1684, title = {The application of cognitive diagnosis and computerized adaptive testing to a large-scale assessment}, year = {2004}, address = {Unpublished doctoral dissertation, University of Texas at Austin}, author = {McGlohen, MK} } @article {202, title = {Assisted self-adapted testing: A comparative study}, journal = {European Journal of Psychological Assessment}, volume = {20}, number = {1}, year = {2004}, pages = {2-9}, abstract = {A new type of self-adapted test (S-AT), called Assisted Self-Adapted Test (AS-AT), is presented. It differs from an ordinary S-AT in that prior to selecting the difficulty category, the computer advises examinees on their best difficulty category choice, based on their previous performance. Three tests (computerized adaptive test, AS-AT, and S-AT) were compared regarding both their psychometric (precision and efficiency) and psychological (anxiety) characteristics. Tests were applied in an actual assessment situation, in which test scores determined 20\% of term grades. A sample of 173 high school students participated. Neither differences in posttest anxiety nor ability were obtained. Concerning precision, AS-AT was as precise as CAT, and both revealed more precision than S-AT. It was concluded that AS-AT acted as a CAT concerning precision. Some hints, but not conclusive support, of the psychological similarity between AS-AT and S-AT was also found. (PsycINFO Database Record (c) 2005 APA ) (journal abstract)}, keywords = {Adaptive Testing, Anxiety, Computer Assisted Testing, Psychometrics, Test}, author = {Hontangas, P. and Olea, J. and Ponsoda, V. and Revuelta, J. and Wise, S. L.} } @conference {856, title = {Automated Simultaneous Assembly of Multi-Stage Testing for the Uniform CPA Examination}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2004}, note = {{PDF file, 201 KB}}, address = {San Diego CA}, author = {Breithaupt, K and Ariel, A. and Veldkamp, B.} } @conference {1058, title = {Combining computer adaptive testing technology with cognitively diagnostic assessment}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2004}, note = {{PDF file, 782 KB}}, address = {San Diego CA}, author = {McGlohen, MK and Chang, Hua-Hua and Wills, J. T.} } @article {8, title = {Computer adaptive testing: a strategy for monitoring stroke rehabilitation across settings}, journal = {Stroke Rehabilitation}, volume = {11}, number = {2}, year = {2004}, note = {Andres, Patricia LBlack-Schaffer, Randie MNi, PengshengHaley, Stephen MR01 hd43568/hd/nichdEvaluation StudiesResearch Support, U.S. Gov{\textquoteright}t, Non-P.H.S.Research Support, U.S. Gov{\textquoteright}t, P.H.S.United StatesTopics in stroke rehabilitationTop Stroke Rehabil. 2004 Spring;11(2):33-9.}, month = {Spring}, pages = {33-39}, edition = {2004/05/01}, abstract = {Current functional assessment instruments in stroke rehabilitation are often setting-specific and lack precision, breadth, and/or feasibility. Computer adaptive testing (CAT) offers a promising potential solution by providing a quick, yet precise, measure of function that can be used across a broad range of patient abilities and in multiple settings. CAT technology yields a precise score by selecting very few relevant items from a large and diverse item pool based on each individual{\textquoteright}s responses. We demonstrate the potential usefulness of a CAT assessment model with a cross-sectional sample of persons with stroke from multiple rehabilitation settings.}, keywords = {*Computer Simulation, *User-Computer Interface, Adult, Aged, Aged, 80 and over, Cerebrovascular Accident/*rehabilitation, Disabled Persons/*classification, Female, Humans, Male, Middle Aged, Monitoring, Physiologic/methods, Severity of Illness Index, Task Performance and Analysis}, isbn = {1074-9357 (Print)}, author = {Andres, P. L. and Black-Schaffer, R. M. and Ni, P. and Haley, S. M.} } @conference {977, title = {Computer adaptive testing and the No Child Left Behind Act}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2004}, note = {{PDF file, 117 KB}}, address = {San Diego CA}, author = {Kingsbury, G. G. and Hauser, C.} } @inbook {1832, title = {Computer-adaptive testing}, year = {2004}, address = {B. Everett, and D. Howell (Eds.), Encyclopedia of statistics in behavioral science. New York: Wiley.}, author = {Luecht, RM} } @booklet {1393, title = {Computer-based test designs with optimal and non-optimal tests for making pass-fail decisions}, year = {2004}, address = {Research Report, University of Massachusetts, Amherst, MA}, author = {Hambleton, R. K. and Xing, D.} } @article {338, title = {A computerized adaptive knowledge test as an assessment tool in general practice: a pilot study}, journal = {Medical Teacher}, volume = {26}, number = {2}, year = {2004}, note = {0142-159xJournal Article}, month = {Mar}, pages = {178-83}, abstract = {Advantageous to assessment in many fields, CAT (computerized adaptive testing) use in general practice has been scarce. In adapting CAT to general practice, the basic assumptions of item response theory and the case specificity must be taken into account. In this context, this study first evaluated the feasibility of converting written extended matching tests into CAT. Second, it questioned the content validity of CAT. A stratified sample of students was invited to participate in the pilot study. The items used in this test, together with their parameters, originated from the written test. The detailed test paths of the students were retained and analysed thoroughly. Using the predefined pass-fail standard, one student failed the test. There was a positive correlation between the number of items and the candidate{\textquoteright}s ability level. The majority of students were presented with questions in seven of the 10 existing domains. Although proved to be a feasible test format, CAT cannot substitute for the existing high-stakes large-scale written test. It may provide a reliable instrument for identifying candidates who are at risk of failing in the written test.}, keywords = {*Computer Systems, Algorithms, Educational Measurement/*methods, Family Practice/*education, Humans, Pilot Projects}, author = {Roex, A. and Degryse, J.} } @article {147, title = {Computerized adaptive measurement of depression: A simulation study}, journal = {BMC Psychiatry}, volume = {4}, number = {1}, year = {2004}, pages = {13-23}, abstract = {Background: Efficient, accurate instruments for measuring depression are increasingly importantin clinical practice. We developed a computerized adaptive version of the Beck DepressionInventory (BDI). We examined its efficiency and its usefulness in identifying Major DepressiveEpisodes (MDE) and in measuring depression severity.Methods: Subjects were 744 participants in research studies in which each subject completed boththe BDI and the SCID. In addition, 285 patients completed the Hamilton Depression Rating Scale.Results: The adaptive BDI had an AUC as an indicator of a SCID diagnosis of MDE of 88\%,equivalent to the full BDI. The adaptive BDI asked fewer questions than the full BDI (5.6 versus 21items). The adaptive latent depression score correlated r = .92 with the BDI total score and thelatent depression score correlated more highly with the Hamilton (r = .74) than the BDI total scoredid (r = .70).Conclusions: Adaptive testing for depression may provide greatly increased efficiency withoutloss of accuracy in identifying MDE or in measuring depression severity.}, keywords = {*Computer Simulation, Adult, Algorithms, Area Under Curve, Comparative Study, Depressive Disorder/*diagnosis/epidemiology/psychology, Diagnosis, Computer-Assisted/*methods/statistics \& numerical data, Factor Analysis, Statistical, Female, Humans, Internet, Male, Mass Screening/methods, Patient Selection, Personality Inventory/*statistics \& numerical data, Pilot Projects, Prevalence, Psychiatric Status Rating Scales/*statistics \& numerical data, Psychometrics, Research Support, Non-U.S. Gov{\textquoteright}t, Research Support, U.S. Gov{\textquoteright}t, P.H.S., Severity of Illness Index, Software}, author = {Gardner, W. and Shear, K. and Kelleher, K. J. and Pajer, K. A. and Mammen, O. and Buysse, D. and Frank, E.} } @inbook {1887, title = {Computerized adaptive testing}, year = {2004}, note = {{PDF file, 180 KB}}, address = {Encyclopedia of social measurement. Academic Press.}, author = {Segall, D. O.} } @inbook {1737, title = {Computerized adaptive testing and item banking}, year = {2004}, note = {{PDF file 371 KB}}, address = {P. M. Fayers and R. D. Hays (Eds.) Assessing Quality of Life. Oxford: Oxford University Press.}, author = {Bjorner, J. B. and Kosinski, M. and Ware, J. E and Jr.} } @article {2177, title = {Computerized Adaptive Testing for Effective and Efficient Measurement in Counseling and Education}, journal = {Measurement and Evaluation in Counseling and Development}, volume = {37}, number = {2}, year = {2004}, pages = {70-84}, author = {Weiss, D. J.} } @article {2114, title = {Computerized Adaptive Testing With Multiple-Form Structures}, journal = {Applied Psychological Measurement}, volume = {28}, number = {3}, year = {2004}, pages = {147-164}, abstract = {

A multiple-form structure (MFS) is an orderedcollection or network of testlets (i.e., sets of items).An examinee\’s progression through the networkof testlets is dictated by the correctness of anexaminee\’s answers, thereby adapting the test tohis or her trait level. The collection of pathsthrough the network yields the set of all possibletest forms, allowing test specialists the opportunityto review them before they are administered. Also,limiting the exposure of an individual MFS to aspecific period of time can enhance test security.This article provides an overview of methods thathave been developed to generate parallel MFSs.The approach is applied to the assembly of anexperimental computerized Law School Admission Test (LSAT).

}, doi = {10.1177/0146621604263652}, url = {http://apm.sagepub.com/content/28/3/147.abstract}, author = {Armstrong, Ronald D. and Jones, Douglas H. and Koppel, Nicole B. and Pashley, Peter J.} } @article {11, title = {Computerized adaptive testing with multiple-form structures}, journal = {Applied Psychological Measurement}, volume = {28}, number = {3}, year = {2004}, pages = {147-164}, publisher = {Sage Publications: US}, abstract = {A multiple-form structure (MFS) is an ordered collection or network of testlets (i.e., sets of items). An examinee{\textquoteright}s progression through the network of testlets is dictated by the correctness of an examinee{\textquoteright}s answers, thereby adapting the test to his or her trait level. The collection of paths through the network yields the set of all possible test forms, allowing test specialists the opportunity to review them before they are administered. Also, limiting the exposure of an individual MFS to a specific period of time can enhance test security. This article provides an overview of methods that have been developed to generate parallel MFSs. The approach is applied to the assembly of an experimental computerized Law School Admission Test (LSAT). (PsycINFO Database Record (c) 2007 APA, all rights reserved)}, keywords = {computerized adaptive testing, Law School Admission Test, multiple-form structure, testlets}, isbn = {0146-6216 (Print)}, author = {Armstrong, R. D. and Jones, D. H. and Koppel, N. B. and Pashley, P. J.} } @article {44, title = {Computers in clinical assessment: Historical developments, present status, and future challenges}, journal = {Journal of Clinical Psychology}, volume = {60}, number = {3}, year = {2004}, pages = {331-345}, publisher = {John Wiley \& Sons: US}, abstract = {Computerized testing methods have long been regarded as a potentially powerful asset for providing psychological assessment services. Ever since computers were first introduced and adapted to the field of assessment psychology in the 1950s, they have been a valuable aid for scoring, data processing, and even interpretation of test results. The history and status of computer-based personality and neuropsychological tests are discussed in this article. Several pertinent issues involved in providing test interpretation by computer are highlighted. Advances in computer-based test use, such as computerized adaptive testing, are described and problems noted. Today, there is great interest in expanding the availability of psychological assessment applications on the Internet. Although these applications show great promise, there are a number of problems associated with providing psychological tests on the Internet that need to be addressed by psychologists before the Internet can become a major medium for psychological service delivery. (PsycINFO Database Record (c) 2007 APA, all rights reserved)}, keywords = {clinical assessment, computerized testing method, Internet, psychological assessment services}, isbn = {0021-9762 (Print); 1097-4679 (Electronic)}, author = {Butcher, J. N. and Perry, J. L. and Hahn, J. A.} } @article {2237, title = {Constraining Item Exposure in Computerized Adaptive Testing With Shadow Tests}, journal = {Journal of Educational and Behavioral Statistics}, volume = {29}, number = {3}, year = {2004}, pages = {273-291}, abstract = {

Item-exposure control in computerized adaptive testing is implemented by imposing item-ineligibility constraints on the assembly process of the shadow tests. The method resembles Sympson and Hetter\’s (1985) method of item-exposure control in that the decisions to impose the constraints are probabilistic. The method does not, however, require time-consuming simulation studies to set values for control parameters before the operational use of the test. Instead, it can set the probabilities of item ineligibility adaptively during the test using the actual item-exposure rates. An empirical study using an item pool from the Law School Admission Test showed that application of the method yielded perfect control of the item-exposure rates and had negligible impact on the bias and mean-squared error functions of the ability estimator.

}, doi = {10.3102/10769986029003273}, url = {http://jeb.sagepub.com/cgi/content/abstract/29/3/273}, author = {van der Linden, Wim J. and Veldkamp, Bernard P.} } @article {408, title = {Constraining item exposure in computerized adaptive testing with shadow tests}, journal = {Journal of Educational and Behavioral Statistics}, volume = {29}, number = {3}, year = {2004}, pages = {273-291}, publisher = {American Educational Research Assn: US}, abstract = {Item-exposure control in computerized adaptive testing is implemented by imposing item-ineligibility constraints on the assembly process of the shadow tests. The method resembles Sympson and Hetter{\textquoteright}s (1985) method of item-exposure control in that the decisions to impose the constraints are probabilistic. The method does not, however, require time-consuming simulation studies to set values for control parameters before the operational use of the test. Instead, it can set the probabilities of item ineligibility adaptively during the test using the actual item-exposure rates. An empirical study using an item pool from the Law School Admission Test showed that application of the method yielded perfect control of the item-exposure rates and had negligible impact on the bias and mean-squared error functions of the ability estimator. }, keywords = {computerized adaptive testing, item exposure control, item ineligibility constraints, Probability, shadow tests}, isbn = {1076-9986 (Print)}, author = {van der Linden, W. J. and Veldkamp, B. P.} } @article {10, title = {Constructing rotating item pools for constrained adaptive testing}, journal = {Journal of Educational Measurement}, volume = {41}, number = {4}, year = {2004}, pages = {345-359}, publisher = {Blackwell Publishing: United Kingdom}, abstract = {Preventing items in adaptive testing from being over- or underexposed is one of the main problems in computerized adaptive testing. Though the problem of overexposed items can be solved using a probabilistic item-exposure control method, such methods are unable to deal with the problem of underexposed items. Using a system of rotating item pools, on the other hand, is a method that potentially solves both problems. In this method, a master pool is divided into (possibly overlapping) smaller item pools, which are required to have similar distributions of content and statistical attributes. These pools are rotated among the testing sites to realize desirable exposure rates for the items. A test assembly model, motivated by Gulliksen{\textquoteright}s matched random subtests method, was explored to help solve the problem of dividing a master pool into a set of smaller pools. Different methods to solve the model are proposed. An item pool from the Law School Admission Test was used to evaluate the performances of computerized adaptive tests from systems of rotating item pools constructed using these methods. (PsycINFO Database Record (c) 2007 APA, all rights reserved)}, keywords = {computerized adaptive tests, constrained adaptive testing, item exposure, rotating item pools}, isbn = {0022-0655 (Print)}, author = {Ariel, A. and Veldkamp, B. P. and van der Linden, W. J.} } @conference {1014, title = {The context effects of multidimensional CAT on the accuracy of multidimensional abilities and the item exposure rates}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2004}, note = {{Incomplete PDF file, 202 KB}}, address = {San Diego CA}, author = {Li, Y. H. and Schafer, W. D.} } @book {1667, title = {Contributions to the theory and practice of computerized adaptive testing}, year = {2004}, address = {Arnhem, The Netherlands: Citogroep}, author = {Theo Eggen} } @conference {941, title = {Detecting exposed test items in computer-based testing}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2004}, note = {{PDF file, 1.245 MB}}, address = {San Diego CA}, author = {Han, N. and Hambleton, R. K.} } @conference {868, title = {Developing tailored instruments: Item banking and computerized adaptive assessment}, booktitle = {Paper presented at the conference {\textquotedblleft}Advances in Health Outcomes Measurement: Exploring the Current State and the Future of Item Response Theory}, year = {2004}, note = {{PDF file, 181 KB}}, address = {Item Banks, and Computer-Adaptive Testing,{\textquotedblright} Bethesda MD}, author = {Chang, C-H.} } @conference {851, title = {Developing tailored instruments: Item banking and computerized adaptive assessment}, booktitle = {Paper presented at the conference {\textquotedblleft}Advances in Health Outcomes Measurement: Exploring the Current State and the Future of Item Response Theory}, year = {2004}, note = {{PDF file, 406 KB}}, address = {Item Banks, and Computer-Adaptive Testing,{\textquotedblright} Bethesda MD}, author = {Bjorner, J. B.} } @article {254, title = {The development and evaluation of a software prototype for computer-adaptive testing}, journal = {Computers and Education}, volume = {43}, number = {1-2}, year = {2004}, pages = {109-123}, keywords = {computerized adaptive testing}, author = {Lilley, M and Barker, T and Britton, C} } @article {69, title = {Effects of practical constraints on item selection rules at the early stages of computerized adaptive testing}, journal = {Journal of Educational Measurement}, volume = {41}, number = {2}, year = {2004}, pages = {149-174}, publisher = {Blackwell Publishing: United Kingdom}, abstract = {The purpose of this study was to compare the effects of four item selection rules--(1) Fisher information (F), (2) Fisher information with a posterior distribution (FP), (3) Kullback-Leibler information with a posterior distribution (KP), and (4) completely randomized item selection (RN)--with respect to the precision of trait estimation and the extent of item usage at the early stages of computerized adaptive testing. The comparison of the four item selection rules was carried out under three conditions: (1) using only the item information function as the item selection criterion; (2) using both the item information function and content balancing; and (3) using the item information function, content balancing, and item exposure control. When test length was less than 10 items, FP and KP tended to outperform F at extreme trait levels in Condition 1. However, in more realistic settings, it could not be concluded that FP and KP outperformed F, especially when item exposure control was imposed. When test length was greater than 10 items, the three nonrandom item selection procedures performed similarly no matter what the condition was, while F had slightly higher item usage. (PsycINFO Database Record (c) 2007 APA, all rights reserved)}, keywords = {computerized adaptive testing, item selection rules, practical constraints}, isbn = {0022-0655 (Print)}, author = {Chen, S-Y. and Ankenmann, R. D.} } @article {332, title = {Estimating ability and item-selection strategy in self-adapted testing: A latent class approach}, journal = {Journal of Educational and Behavioral Statistics}, volume = {29}, number = {4}, year = {2004}, pages = {379-396}, publisher = {American Educational Research Assn: US}, abstract = {This article presents a psychometric model for estimating ability and item-selection strategies in self-adapted testing. In contrast to computer adaptive testing, in self-adapted testing the examinees are allowed to select the difficulty of the items. The item-selection strategy is defined as the distribution of difficulty conditional on the responses given to previous items. The article shows that missing responses in self-adapted testing are missing at random and can be ignored in the estimation of ability. However, the item-selection strategy cannot always be ignored in such an estimation. An EM algorithm is presented to estimate an examinee{\textquoteright}s ability and strategies, and a model fit is evaluated using Akaike{\textquoteright}s information criterion. The article includes an application with real data to illustrate how the model can be used in practice for evaluating hypotheses, estimating ability, and identifying strategies. In the example, four strategies were identified and related to examinees{\textquoteright} ability. It was shown that individual examinees tended not to follow a consistent strategy throughout the test. (PsycINFO Database Record (c) 2007 APA, all rights reserved)}, keywords = {estimating ability, item-selection strategies, psychometric model, self-adapted testing}, isbn = {1076-9986 (Print)}, author = {Revuelta, J.} } @article {2086, title = {Evaluating scale stability of a computer adaptive testing system}, number = {05-12}, year = {2004}, institution = {GMAC}, address = {McLean, VA}, author = {Guo, F. and Wang, L.} } @book {1725, title = {Evaluating the effects of several multi-stage testing design variables on selected psychometric outcomes for certification and licensure assessment}, year = {2004}, address = {Unpublished doctoral dissertation, University of Massachusetts, Amherst}, author = {Zenisky, A. L.} } @article {245, title = {{\'E}valuation et multim{\'e}dia dans l{\textquoteright}apprentissage d{\textquoteright}une L2 [Assessment and multimedia in learning an L2]}, journal = {ReCALL}, volume = {16}, number = {2}, year = {2004}, pages = {475-487}, abstract = {In the first part of this paper different areas where technology may be used for second language assessment are described. First, item banking operations, which are generally based on item Response Theory but not necessarily restricted to dichotomously scored items, facilitate assessment task organization and require technological support. Second, technology may help to design more authentic assessment tasks or may be needed in some direct testing situations. Third, the assessment environment may be more adapted and more stimulating when technology is used to give the student more control. The second part of the paper presents different functions of assessment. The monitoring function (often called formative assessment) aims at adapting the classroom activities to students and to provide continuous feedback. Technology may be used to train the teachers in monitoring techniques, to organize data or to produce diagnostic information; electronic portfolios or quizzes that are built in some educational software may also be used for monitoring. The placement function is probably the one in which the application of computer adaptive testing procedures (e.g. French CAPT) is the most appropriate. Automatic scoring devices may also be used for placement purposes. Finally the certification function requires more valid and more reliable tools. Technology may be used to enhance the testing situation (to make it more authentic) or to facilitate data processing during the construction of a test. Almond et al. (2002) propose a four component model (Selection, Presentation, Scoring and Response) for designing assessment systems. Each component must be planned taking into account the assessment function. }, keywords = {Adaptive Testing, Computer Assisted Instruction, Educational, Foreign Language Learning, Program Evaluation, Technology computerized adaptive testing}, author = {Laurier, M.} } @article {291, title = {Evaluation of the CATSIB DIF procedure in a pretest setting}, journal = {Journal of Educational and Behavioral Statistics}, volume = {29}, number = {2}, year = {2004}, pages = {177-199}, publisher = {American Educational Research Assn: US}, abstract = {A new procedure, CATSIB, for assessing differential item functioning (DIF) on computerized adaptive tests (CATs) is proposed. CATSIB, a modified SIBTEST procedure, matches test takers on estimated ability and controls for impact-induced Type I error inflation by employing a CAT version of the SIBTEST "regression correction." The performance of CATSIB in terms of detection of DIF in pretest items was evaluated in a simulation study. Simulated test takers were adoptively administered 25 operational items from a pool of 1,000 and were linearly administered 16 pretest items that were evaluated for DIF. Sample size varied from 250 to 500 in each group. Simulated impact levels ranged from a 0- to 1-standard-deviation difference in mean ability levels. The results showed that CATSIB with the regression correction displayed good control over Type 1 error, whereas CATSIB without the regression correction displayed impact-induced Type 1 error inflation. With 500 test takers in each group, power rates were exceptionally high (84\% to 99\%) for values of DIF at the boundary between moderate and large DIF. For smaller samples of 250 test takers in each group, the corresponding power rates ranged from 47\% to 95\%. In addition, in all cases, CATSIB was very accurate in estimating the true values of DIF, displaying at most only minor estimation bias. (PsycINFO Database Record (c) 2007 APA, all rights reserved)}, keywords = {computerized adaptive tests, differential item functioning}, isbn = {1076-9986 (Print)}, author = {Nandakumar, R. and Roussos, L. A.} } @article {506, title = {ffects of practical constraints on item selection rules at the early stages of computerized adaptive testing}, journal = {Journal of Educational Measurement}, volume = {41}, year = {2004}, pages = {149-174}, author = {Chen, Y.-Y. and Ankenmann, R. D.} } @article {2171, title = {Impact of Test Design, Item Quality, and Item Bank Size on the Psychometric Properties of Computer-Based Credentialing Examinations}, journal = {Educational and Psychological Measurement}, volume = {64}, number = {1}, year = {2004}, pages = {5-21}, abstract = {

Computer-based testing by credentialing agencies has become common; however, selecting a test design is difficult because several good ones are available\—parallel forms, computer adaptive (CAT), and multistage (MST). In this study, three computerbased test designs under some common examination conditions were investigated. Item bank size and item quality had a practically significant impact on decision consistency and accuracy. Even in nearly ideal situations, the choice of test design was not a factor in the results. Two conclusions follow from the findings: (a) More time and resources should be committed to expanding the size and quality of item banks, and (b) designs that individualize an exam administration such as MST and CAT may not be helpful when the primary purpose of the examination is to make pass-fail decisions and conditions are present for using parallel forms with a target information function that can be centered on the passing score.

}, doi = {10.1177/0013164403258393}, url = {http://epm.sagepub.com/content/64/1/5.abstract}, author = {Xing, Dehui and Hambleton, Ronald K.} } @article {2136, title = {Implementation and Measurement Efficiency of Multidimensional Computerized Adaptive Testing}, journal = {Applied Psychological Measurement}, volume = {28}, number = {5}, year = {2004}, pages = {295-316}, abstract = {

Multidimensional adaptive testing (MAT) procedures are proposed for the measurement of several latent traits by a single examination. Bayesian latent trait estimation and adaptive item selection are derived. Simulations were conducted to compare the measurement efficiency of MAT with those of unidimensional adaptive testing and random administration. The results showed that the higher the correlation between latent traits, the more latent traits there were, and the more scoring levels there were in the items, the more efficient MAT was than the other two procedures. For tests containing multidimensional items, only MAT is applicable, whereas unidimensional adaptive testing is not. Issues in implementing MAT are discussed.

}, doi = {10.1177/0146621604265938}, url = {http://apm.sagepub.com/content/28/5/295.abstract}, author = {Wang, Wen-Chung and Chen, Po-Hsi} } @conference {1290, title = {Investigating the effects of selected multi-stage test design alternatives on credentialing outcomes}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2004}, note = {{PDF file, 129 KB}}, address = {San Diego CA}, author = {Zenisky, A. L. and Hambleton, R. K.} } @proceedings {214, title = {An investigation of two combination procedures of SPRT for three-category classification decisions in computerized classification test}, journal = {annual meeting of the American Educational Research Association}, year = {2004}, note = {annual meeting of the American Educational Research Association, San Antonio}, month = {04/2004}, address = {San Antonio, Texas}, keywords = {computerized adaptive testing, Computerized classification testing, sequential probability ratio testing}, author = {Jiao, H. and Wang, S and Lau, CA} } @booklet {1414, title = {An investigation of two combination procedures of SPRT for three-category decisions in computerized classification test}, year = {2004}, note = {{PDF file, 649 KB}}, address = {Paper presented at the annual meeting of the American Educational Research Association, San Diego CA}, author = {Jiao, H. and Wang, S and Lau, A} } @conference {906, title = {Item parameter recovery with adaptive tests}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2004}, note = {$\#$DO04-01 {PDF file, 379 KB}}, address = {San Diego CA}, author = {Do, B.-R. and Chuah, S. C. and F Drasgow} } @article {139, title = {Kann die Konfundierung von Konzentrationsleistung und Aktivierung durch adaptives Testen mit dern FAKT vermieden werden? [Avoiding the confounding of concentration performance and activation by adaptive testing with the FACT]}, journal = {Zeitschrift f{\"u}r Differentielle und Diagnostische Psychologie}, volume = {25}, number = {1}, year = {2004}, pages = {1-17}, abstract = {The study investigates the effect of computerized adaptive testing strategies on the confounding of concentration performance with activation. A sample of 54 participants was administered 1 out of 3 versions (2 adaptive, 1 non-adaptive) of the computerized Frankfurt Adaptive Concentration Test FACT (Moosbrugger \& Heyden, 1997) at three subsequent points in time. During the test administration changes in activation (electrodermal activity) were recorded. The results pinpoint a confounding of concentration performance with activation for the non-adaptive test version, but not for the adaptive test versions (p = .01). Thus, adaptive FACT testing strategies can remove the confounding of concentration performance with activation, thereby increasing the discriminant validity. In conclusion, an attention-focusing-hypothesis is formulated to explain the observed effect. (PsycINFO Database Record (c) 2005 APA ) (journal abstract)}, keywords = {Adaptive Testing, Computer Assisted Testing, Concentration, Performance, Testing computerized adaptive testing}, author = {Frey, A. and Moosbrugger, H.} } @conference {2251, title = {A learning environment for english for academic purposes based on adaptive tests and task-based systems}, booktitle = {Intelligent Tutoring Systems. }, year = {2004}, publisher = { Springer Berlin Heidelberg}, organization = { Springer Berlin Heidelberg}, author = {PITON-GON{\c C}ALVES, J. and ALUISIO, S. M. and MENDONCA, L. H. and NOVAES, O. O.} } @inbook {2201, title = {A Learning Environment for English for Academic Purposes Based on Adaptive Tests and Task-Based Systems}, booktitle = {Intelligent Tutoring Systems}, series = {Lecture Notes in Computer Science}, volume = {3220}, year = {2004}, pages = {1-11}, publisher = {Springer Berlin / Heidelberg}, organization = {Springer Berlin / Heidelberg}, isbn = {978-3-540-22948-3}, doi = {10.1007/978-3-540-30139-4_1}, url = {http://dx.doi.org/10.1007/978-3-540-30139-4_1}, author = {Gon{\c c}alves, Jean P. and Aluisio, Sandra M. and de Oliveira, Leandro H.M. and Oliveira Jr., Osvaldo N.}, editor = {Lester, James C. and Vicari, Rosa Maria and Paragua{\c c}u, F{\'a}bio} } @article {2137, title = {Mokken Scale Analysis Using Hierarchical Clustering Procedures}, journal = {Applied Psychological Measurement}, volume = {28}, number = {5}, year = {2004}, pages = {332-354}, abstract = {

Mokken scale analysis (MSA) can be used to assess and build unidimensional scales from an item pool that is sensitive to multiple dimensions. These scales satisfy a set of scaling conditions, one of which follows from the model of monotone homogeneity. An important drawback of the MSA program is that the sequential item selection and scale construction procedure may not find the dominant underlying dimensionality of the responses to a set of items. The authors investigated alternative hierarchical item selection procedures and compared the performance of four hierarchical methods and the sequential clustering method in the MSA context. The results showed that hierarchical clustering methods can improve the search process of the dominant dimensionality of a data matrix. In particular, the complete linkage and scale linkage methods were promising in finding the dimensionality of the item response data from a set of items.

}, doi = {10.1177/0146621604265510}, url = {http://apm.sagepub.com/content/28/5/332.abstract}, author = {van Abswoude, Alexandra A. H. and Vermunt, Jeroen K. and Hemker, Bas T. and van der Ark, L. Andries} } @conference {1257, title = {Mutual information item selection in multiple-category classification CAT}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2004}, note = {$\#$WE04-02}, address = {San Diego CA}, author = {Weissman, A.} } @conference {1234, title = {New methods for CBT item pool evaluation}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2004}, note = {$\#$WA04-02 {PDF file, 1.005 MB}}, address = {San Diego CA}, author = {Wang, L.} } @booklet {1370, title = {Optimal testing with easy items in computerized adaptive testing (Measurement and Research Department Report 2004-2)}, year = {2004}, address = {Arnhem, The Netherlands: Cito Group}, author = {Theo Eggen and Verschoor, A. J.} } @booklet {200, title = {Practical methods for dealing with {\textquoteright}not applicable{\textquoteright} item responses in the AMC Linear Disability Score project}, journal = {Health and Quality of Life Outcomes}, volume = {2}, year = {2004}, note = {Holman, RebeccaGlas, Cees A WLindeboom, RobertZwinderman, Aeilko Hde Haan, Rob JEnglandHealth Qual Life Outcomes. 2004 Jun 16;2:29.}, month = {Jun 16}, pages = {29}, type = {Comparative StudyResearch Support, Non-U.S. Gov{\textquoteright}t}, edition = {2004/06/18}, abstract = {BACKGROUND: Whenever questionnaires are used to collect data on constructs, such as functional status or health related quality of life, it is unlikely that all respondents will respond to all items. This paper examines ways of dealing with responses in a {\textquoteright}not applicable{\textquoteright} category to items included in the AMC Linear Disability Score (ALDS) project item bank. METHODS: The data examined in this paper come from the responses of 392 respondents to 32 items and form part of the calibration sample for the ALDS item bank. The data are analysed using the one-parameter logistic item response theory model. The four practical strategies for dealing with this type of response are: cold deck imputation; hot deck imputation; treating the missing responses as if these items had never been offered to those individual patients; and using a model which takes account of the {\textquoteright}tendency to respond to items{\textquoteright}. RESULTS: The item and respondent population parameter estimates were very similar for the strategies involving hot deck imputation; treating the missing responses as if these items had never been offered to those individual patients; and using a model which takes account of the {\textquoteright}tendency to respond to items{\textquoteright}. The estimates obtained using the cold deck imputation method were substantially different. CONCLUSIONS: The cold deck imputation method was not considered suitable for use in the ALDS item bank. The other three methods described can be usefully implemented in the ALDS item bank, depending on the purpose of the data analysis to be carried out. These three methods may be useful for other data sets examining similar constructs, when item response theory based methods are used.}, keywords = {*Disability Evaluation, *Health Surveys, *Logistic Models, *Questionnaires, Activities of Daily Living/*classification, Data Interpretation, Statistical, Health Status, Humans, Pilot Projects, Probability, Quality of Life, Severity of Illness Index}, isbn = {1477-7525 (Electronic)1477-7525 (Linking)}, author = {Holman, R. and Glas, C. A. and Lindeboom, R. and Zwinderman, A. H. and de Haan, R. J.} } @article {381, title = {Pre-equating: a simulation study based on a large scale assessment model}, journal = {Journal of Applied Measurement}, volume = {5}, number = {3}, year = {2004}, note = {1529-7713Journal Article}, pages = {301-18}, abstract = {Although post-equating (PE) has proven to be an acceptable method in the scaling and equating of items and forms, there are times when the turn-around period for equating and converting raw scores to scale scores is so small that PE cannot be undertaken within the prescribed time frame. In such cases, pre-equating (PrE) could be considered as an acceptable alternative. Assessing the feasibility of using item calibrations from the item bank (as in PrE) is conditioned on the equivalency of the calibrations and the errors associated with it vis a vis the results obtained via PE. This paper creates item banks over three periods of item introduction into the banks and uses the Rasch model in examining data with respect to the recovery of item parameters, the measurement error, and the effect cut-points have on examinee placement in both the PrE and PE situations. Results indicate that PrE is a viable solution to PE provided the stability of the item calibrations are enhanced by using large sample sizes (perhaps as large as full-population) in populating the item bank.}, keywords = {*Databases, *Models, Theoretical, Calibration, Human, Psychometrics, Reference Values, Reproducibility of Results}, author = {Taherbhai, H. M. and Young, M. J.} } @conference {885, title = {Protecting the integrity of computer-adaptive licensure tests: Results of a legal challenge}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2004}, note = {{PDF file, 191 KB}}, address = {San Diego CA}, author = {Cizek, G. J.} } @article {87, title = {Refining the conceptual basis for rehabilitation outcome measurement: personal care and instrumental activities domain}, journal = {Medical Care}, volume = {42}, number = {1 Suppl}, year = {2004}, note = {0025-7079Journal Article}, month = {Jan}, pages = {I62-172}, abstract = {BACKGROUND: Rehabilitation outcome measures routinely include content on performance of daily activities; however, the conceptual basis for item selection is rarely specified. These instruments differ significantly in format, number, and specificity of daily activity items and in the measurement dimensions and type of scale used to specify levels of performance. We propose that a requirement for upper limb and hand skills underlies many activities of daily living (ADL) and instrumental activities of daily living (IADL) items in current instruments, and that items selected based on this definition can be placed along a single functional continuum. OBJECTIVE: To examine the dimensional structure and content coverage of a Personal Care and Instrumental Activities item set and to examine the comparability of items from existing instruments and a set of new items as measures of this domain. METHODS: Participants (N = 477) from 3 different disability groups and 4 settings representing the continuum of postacute rehabilitation care were administered the newly developed Activity Measure for Post-Acute Care (AM-PAC), the SF-8, and an additional setting-specific measure: FIM (in-patient rehabilitation); MDS (skilled nursing facility); MDS-PAC (postacute settings); OASIS (home care); or PF-10 (outpatient clinic). Rasch (partial-credit model) analyses were conducted on a set of 62 items covering the Personal Care and Instrumental domain to examine item fit, item functioning, and category difficulty estimates and unidimensionality. RESULTS: After removing 6 misfitting items, the remaining 56 items fit acceptably along the hypothesized continuum. Analyses yielded different difficulty estimates for the maximum score (eg, "Independent performance") for items with comparable content from different instruments. Items showed little differential item functioning across age, diagnosis, or severity groups, and 92\% of the participants fit the model. CONCLUSIONS: ADL and IADL items from existing rehabilitation outcomes instruments that depend on skilled upper limb and hand use can be located along a single continuum, along with the new personal care and instrumental items of the AM-PAC addressing gaps in content. Results support the validity of the proposed definition of the Personal Care and Instrumental Activities dimension of function as a guide for future development of rehabilitation outcome instruments, such as linked, setting-specific short forms and computerized adaptive testing approaches.}, keywords = {*Self Efficacy, *Sickness Impact Profile, Activities of Daily Living/*classification/psychology, Adult, Aged, Aged, 80 and over, Disability Evaluation, Factor Analysis, Statistical, Female, Humans, Male, Middle Aged, Outcome Assessment (Health Care)/*methods/statistics \& numerical data, Questionnaires/*standards, Recovery of Function/physiology, Rehabilitation/*standards/statistics \& numerical data, Reproducibility of Results, Research Support, U.S. Gov{\textquoteright}t, Non-P.H.S., Research Support, U.S. Gov{\textquoteright}t, P.H.S., Sensitivity and Specificity}, author = {Coster, W. J. and Haley, S. M. and Andres, P. L. and Ludlow, L. H. and Bond, T. L. and Ni, P. S.} } @article {167, title = {Score comparability of short forms and computerized adaptive testing: Simulation study with the activity measure for post-acute care}, journal = {Archives of Physical Medicine and Rehabilitation}, volume = {85}, number = {4}, year = {2004}, note = {Haley, Stephen MCoster, Wendy JAndres, Patricia LKosinski, MarkNi, PengshengR01 hd43568/hd/nichdComparative StudyMulticenter StudyResearch Support, U.S. Gov{\textquoteright}t, Non-P.H.S.Research Support, U.S. Gov{\textquoteright}t, P.H.S.United StatesArchives of physical medicine and rehabilitationArch Phys Med Rehabil. 2004 Apr;85(4):661-6.}, month = {Apr}, pages = {661-6}, edition = {2004/04/15}, abstract = {OBJECTIVE: To compare simulated short-form and computerized adaptive testing (CAT) scores to scores obtained from complete item sets for each of the 3 domains of the Activity Measure for Post-Acute Care (AM-PAC). DESIGN: Prospective study. SETTING: Six postacute health care networks in the greater Boston metropolitan area, including inpatient acute rehabilitation, transitional care units, home care, and outpatient services. PARTICIPANTS: A convenience sample of 485 adult volunteers who were receiving skilled rehabilitation services. INTERVENTIONS: Not applicable. MAIN OUTCOME MEASURES: Inpatient and community-based short forms and CAT applications were developed for each of 3 activity domains (physical \& mobility, personal care \& instrumental, applied cognition) using item pools constructed from new items and items from existing postacute care instruments. RESULTS: Simulated CAT scores correlated highly with score estimates from the total item pool in each domain (4- and 6-item CAT r range,.90-.95; 10-item CAT r range,.96-.98). Scores on the 10-item short forms constructed for inpatient and community settings also provided good estimates of the AM-PAC item pool scores for the physical \& movement and personal care \& instrumental domains, but were less consistent in the applied cognition domain. Confidence intervals around individual scores were greater in the short forms than for the CATs. CONCLUSIONS: Accurate scoring estimates for AM-PAC domains can be obtained with either the setting-specific short forms or the CATs. The strong relationship between CAT and item pool scores can be attributed to the CAT{\textquoteright}s ability to select specific items to match individual responses. The CAT may have additional advantages over short forms in practicality, efficiency, and the potential for providing more precise scoring estimates for individuals.}, keywords = {Boston, Factor Analysis, Statistical, Humans, Outcome Assessment (Health Care)/*methods, Prospective Studies, Questionnaires/standards, Rehabilitation/*standards, Subacute Care/*standards}, isbn = {0003-9993 (Print)}, author = {Haley, S. M. and Coster, W. J. and Andres, P. L. and Kosinski, M. and Ni, P.} } @conference {1211, title = {A sequential Bayesian procedure for item calibration in multistage testing}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2004}, address = {San Diego CA}, author = {van der Linden, W. J. and Alan D Mead} } @article {61, title = {Sequential estimation in variable length computerized adaptive testing}, journal = {Journal of Statistical Planning and Inference}, volume = {121}, number = {2}, year = {2004}, pages = {249-264}, abstract = {With the advent of modern computer technology, there have been growing e3orts in recent years to computerize standardized tests, including the popular Graduate Record Examination (GRE), the Graduate Management Admission Test (GMAT) and the Test of English as a Foreign Language (TOEFL). Many of such computer-based tests are known as the computerized adaptive tests, a major feature of which is that, depending on their performance in the course of testing, di3erent examinees may be given with di3erent sets of items (questions). In doing so, items can be e>ciently utilized to yield maximum accuracy for estimation of examinees{\textquoteright} ability traits. We consider, in this article, one type of such tests where test lengths vary with examinees to yield approximately same predetermined accuracy for all ability traits. A comprehensive large sample theory is developed for the expected test length and the sequential point and interval estimates of the latent trait. Extensive simulations are conducted with results showing that the large sample approximations are adequate for realistic sample sizes. }, isbn = {03783758}, author = {Chang, I. Y.} } @article {355, title = {A sharing item response theory model for computerized adaptive testing}, journal = {Journal of Educational and Behavioral Statistics}, volume = {29}, number = {4}, year = {2004}, note = {References .American Educational Research Assn, US}, month = {Win}, pages = {439-460}, abstract = {A new sharing item response theory (SIRT) model is presented which explicitly models the effects of sharing item content between informants and testtakers. This model is used to construct adaptive item selection and scoring rules that provide increased precision and reduced score gains in instances where sharing occurs. The adaptive item selection rules are expressed as functions of the item{\textquoteright}s exposure rate in addition to other commonly used properties (characterized by difficulty, discrimination, and guessing parameters). Based on the results of simulated item responses, the new item selection and scoring algorithms compare favorably to the Sympson-Hetter exposure control method. The new SIRT approach provides higher reliability and lower score gains in instances where sharing occurs.}, author = {Segall, D. O.} } @article {82, title = {Siette: a web-based tool for adaptive testing}, journal = {International Journal of Artificial Intelligence in Education}, volume = {14}, number = {1}, year = {2004}, pages = {29-61}, keywords = {computerized adaptive testing}, author = {Conejo, R and Guzm{\'a}n, E and Mill{\'a}n, E and Trella, M and P{\'e}rez-De-La-Cruz, JL and R{\'\i}os, A} } @inbook {1771, title = {State-of-the-art and adaptive open-closed items in adaptive foreign language assessment}, year = {2004}, address = {Proceedings 4th Hellenic Conference with ternational Participation: Informational and Communication Technologies in Education, Athens,747-756}, author = {Giouroglou, H. and Economides, A. A.} } @article {624, title = {Statistics for detecting disclosed items in a CAT environment}, journal = {Metodologia de Las Ciencias del Comportamiento.}, volume = {5}, year = {2004}, chapter = {p{\'a}gs. 225-242}, issn = {1575-9105}, author = {Lu, Y., and Hambleton, R. K.} } @article {2115, title = {Strategies for Controlling Item Exposure in Computerized Adaptive Testing With the Generalized Partial Credit Model}, journal = {Applied Psychological Measurement}, volume = {28}, number = {3}, year = {2004}, pages = {165-185}, abstract = {

Choosing a strategy for controlling item exposure has become an integral part of test development for computerized adaptive testing (CAT). This study investigated the performance of six procedures for controlling item exposure in a series of simulated CATs under the generalized partial credit model. In addition to a no-exposure control baseline condition, the randomesque, modified-within-.10-logits, Sympson-Hetter, conditional Sympson-Hetter, a-stratified with multiple-stratification, and enhanced a-stratified with multiple-stratification procedures were implemented to control exposure rates. Two variations of the randomesque and modified-within-.10-logits procedures were examined, which varied the size of the item group from which the next item to be administered was randomly selected. The results indicate that although the conditional Sympson-Hetter provides somewhat lower maximum exposure rates, the randomesque and modified-within-.10-logits procedures with the six-item group variation have great utility for controlling overlap rates and increasing pool utilization and should be given further consideration.

}, doi = {10.1177/0146621604264133}, url = {http://apm.sagepub.com/content/28/3/165.abstract}, author = {Davis, Laurie Laughlin} } @article {93, title = {Strategies for controlling item exposure in computerized adaptive testing with the generalized partial credit model}, journal = {Applied Psychological Measurement}, volume = {28}, number = {3}, year = {2004}, pages = {165-185}, publisher = {Sage Publications: US}, abstract = {Choosing a strategy for controlling item exposure has become an integral part of test development for computerized adaptive testing (CAT). This study investigated the performance of six procedures for controlling item exposure in a series of simulated CATs under the generalized partial credit model. In addition to a no-exposure control baseline condition, the randomesque, modified-within-.10-logits, Sympson-Hetter, conditional Sympson-Hetter, a-stratified with multiple-stratification, and enhanced a-stratified with multiple-stratification procedures were implemented to control exposure rates. Two variations of the randomesque and modified-within-.10-logits procedures were examined, which varied the size of the item group from which the next item to be administered was randomly selected. The results indicate that although the conditional Sympson-Hetter provides somewhat lower maximum exposure rates, the randomesque and modified-within-.10-logits procedures with the six-item group variation have great utility for controlling overlap rates and increasing pool utilization and should be given further consideration. (PsycINFO Database Record (c) 2007 APA, all rights reserved)}, keywords = {computerized adaptive testing, generalized partial credit model, item exposure}, isbn = {0146-6216 (Print)}, author = {Davis, L. L.} } @article {38, title = {Strategies for controlling testlet exposure rates in computerized adaptive testing systems}, journal = {Dissertation Abstracts International: Section B: The Sciences \& Engineering}, volume = {64}, number = {11-B}, year = {2004}, pages = {5835}, abstract = {Exposure control procedures in computerized adaptive testing (CAT) systems protect item pools from being compromised, however, this impacts measurement precision. Previous research indicates that exposure control procedures perform differently for dichotomously scored versus polytomously scored CAT systems. For dichotomously scored CATs, conditional selection procedures are often the optimal choice, while randomization procedures perform best for polytomously scored CATs. CAT systems modeled with testlet response theory have not been examined to determine optimal exposure control procedures. This dissertation examined various exposure control procedures in testlet-based CAT systems using the three-parameter logistic testlet response theory model and the partial credit model. The exposure control procedures were the randomesque procedure, the modified within .10 logits procedure, two levels of the progressive restricted procedure, and two levels of the Sympson-Hetter procedure. Each of these was compared to a baseline no exposure control procedure, maximum information. The testlets were reading passages with six to ten multiple-choice items. The CAT systems consisted of maximum information testlet selection contingent on an exposure control procedure and content balancing for passage type and the number of items per passage; expected a posteriori ability estimation; and a fixed length stopping rule of seven testlets totaling fifty multiple-choice items. Measurement precision and exposure rates were examined to evaluate the effectiveness of the exposure control procedures for each measurement model. The exposure control procedures yielded similar results for measurement precision within the models. The exposure rates distinguished which exposure control procedures were most effective. The Sympson-Hetter conditions, which are conditional procedures, maintained the pre-specified maximum exposure rate, but performed very poorly in terms of pool utilization. The randomization procedures, randomesque and modified within .10 logits, yielded low maximum exposure rates, but used only about 70\% of the testlet pool. Surprisingly, the progressive restricted procedure, which is a combination of both a conditional and randomization procedure, yielded the best results in its ability to maintain and control the maximum exposure rate and it used the entire testlet pool. The progressive restricted conditions were the optimal procedures for both the partial credit CAT systems and the three-parameter logistic testlet response theory CAT systems. (PsycINFO Database Record (c) 2004 APA, all rights reserved).}, author = {Boyd, Aimee Michelle} } @conference {830, title = {A study of multiple stage adaptive test designs}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2004}, note = {{PDF file, 288 KB}}, address = {San Diego CA}, author = {Armstrong, R. D. and Edmonds, J.} } @article {718, title = {Test difficulty and stereotype threat on the GRE General Test}, journal = {Journal of Applied Social Psychology}, volume = {34(3)}, year = {2004}, pages = {563-597}, author = {Stricker, L. J., and Bejar, I. I.} } @article {244, title = {Testing vocabulary knowledge: Size, strength, and computer adaptiveness}, journal = {Language Learning}, volume = {54}, number = {3}, year = {2004}, note = {References .Blackwell Publishing, United Kingdom}, month = {Sep}, pages = {399-436}, abstract = {(from the journal abstract) In this article, we describe the development and trial of a bilingual computerized test of vocabulary size, the number of words the learner knows, and strength, a combination of four aspects of knowledge of meaning that are assumed to constitute a hierarchy of difficulty: passive recognition (easiest), active recognition, passive recall, and active recall (hardest). The participants were 435 learners of English as a second language. We investigated whether the above hierarchy was valid and which strength modality correlated best with classroom language performance. Results showed that the hypothesized hierarchy was present at all word frequency levels, that passive recall was the best predictor of classroom language performance, and that growth in vocabulary knowledge was different for the different strength modalities. (PsycINFO Database Record (c) 2004 APA, all rights reserved).}, author = {Laufer, B. and Goldstein, Z.} } @inbook {1742, title = {Understanding computerized adaptive testing: From Robbins-Munro to Lord and beyond}, year = {2004}, address = {D. Kaplan (Ed.), The Sage handbook of quantitative methodology for the social sciences (pp. 117-133). New York: Sage.}, author = {Chang, Hua-Hua} } @article {278, title = {Using patterns of summed scores in paper-and-pencil tests and computer-adaptive tests to detect misfitting item score patterns}, journal = {Journal of Educational Measurement}, volume = {41}, number = {2}, year = {2004}, pages = {119-136}, abstract = {Two new methods have been proposed to determine unexpected sum scores on subtests (testlets) both for paper-and-pencil tests and computer adaptive tests. A method based on a conservative bound using the hypergeometric distribution, denoted ρ, was compared with a method where the probability for each score combination was calculated using a highest density region (HDR). Furthermore, these methods were compared with the standardized log-likelihood statistic with and without a correction for the estimated latent trait value (denoted as l-super(*)-sub(z) and l-sub(z), respectively). Data were simulated on the basis of the one-parameter logistic model, and both parametric and nonparametric logistic regression was used to obtain estimates of the latent trait. Results showed that it is important to take the trait level into account when comparing subtest scores. In a nonparametric item response theory (IRT) context, on adapted version of the HDR method was a powerful alterative to ρ. In a parametric IRT context, results showed that l-super(*)-sub(z) had the highest power when the data were simulated conditionally on the estimated latent trait level. (PsycINFO Database Record (c) 2005 APA ) (journal abstract)}, keywords = {Computer Assisted Testing, Item Response Theory, person Fit, Test Scores}, author = {Meijer, R. R.} } @article {2138, title = {Using Set Covering with Item Sampling to Analyze the Infeasibility of Linear Programming Test Assembly Models}, journal = {Applied Psychological Measurement}, volume = {28}, number = {5}, year = {2004}, pages = {355-375}, abstract = {

This article shows how set covering with item sampling (SCIS) methods can be used in the analysis and preanalysis of linear programming models for test assembly (LPTA). LPTA models can construct tests, fulfilling a set of constraints set by the test assembler. Sometimes, no solution to the LPTA model exists. The model is then said to be infeasible. Causes of infeasibility can be difficult to find. A method is proposed that constitutes a helpful tool for test assemblers to detect infeasibility before hand and, in the case of infeasibility, give insight into its causes. This method is based on SCIS. Although SCIS can help to detect feasibility or infeasibility, its power lies in pinpointing causes of infeasibility such as irreducible infeasible sets of constraints. Methods to resolve infeasibility are also given, minimizing the model deviations. A simulation study is presented, offering a guide to test assemblers to analyze and solve infeasibility.

}, doi = {10.1177/0146621604266152}, url = {http://apm.sagepub.com/content/28/5/355.abstract}, author = {Huitzing, Hiddo A.} } @article {2084, title = {Validating the German computerized adaptive test for anxiety on healthy sample (A-CAT)}, journal = {Quality of Life Research}, volume = {13}, year = {2004}, pages = {1515}, author = {Becker, J. and Walter, O. B. and Fliege, H. and Bjorner, J. B. and Kocalevent, R. D. and Schmid, G. and Klapp, B. F. and Rose, M.} } @conference {1010, title = {Accuracy of reading and mathematics ability estimates under the shadow-test constraint MCAT}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2003}, address = {Chicago IL}, author = {Li, Y. H. and Schafer, W. D.} } @booklet {127, title = {An adaptation of stochastic curtailment to truncate Wald{\textquoteright}s SPRT in computerized adaptive testing}, number = {CSE Report 606}, year = {2003}, month = {Steptember, 2003}, pages = {1-26}, publisher = {National Center for Research on Evaluation, Standards, and Student Testing}, address = {Los Angeles}, abstract = {

Computerized adaptive testing (CAT) has been shown to increase e\Æciency in educational measurement. One common application of CAT is to classify students as either pro cient or not proficient in ability. A truncated form of Wald\&$\#$39;s sequential probability ratio test (SPRT), in which examination is halted after a prespeci ed number of questions, has been proposed to provide a diagnosis of prociency. This article studies the further truncation provided by stochastic curtailment, where an exam is stopped early if completion of the remaining questions would be unlikely to alter the classi cation of the examinee. In a simulation study presented, the increased truncation is shown to offer substantial improvement in test length with only a slight decrease in accuracy.

}, author = {Finkelman, M.} } @inbook {1818, title = {Adaptive exploration of assessment results under uncertainty}, year = {2003}, address = {Proceedings 3rd IEEE ternational Conference on Advanced Learning Technologies, ICALT {\textquoteright}03,~460-461, 2003.}, author = {Lamboudis, D. and Economides, A. A. and Papastergiou, A.} } @conference {1151, title = {An adaptive exposure control algorithm for computerized adaptive testing using a sharing item response theory model}, booktitle = {Paper presented at the Annual meeting of the National Council on Measurement in Education}, year = {2003}, note = {{PDF file, 191 KB}}, address = {Chicago IL}, author = {Segall, D. O.} } @article {744, title = {Alpha-stratified adaptive testing with large numbers of content constraints}, journal = {Applied Psychological Measurement}, volume = {27}, year = {2003}, pages = {107-120}, author = {van der Linden, W. J. and Chang, Hua-Hua} } @conference {832, title = {The assembly of multiple form structures}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2003}, note = {{PDF file, 418 KB}}, address = {Chicago IL}, author = {Armstrong, R. D. and Little, J.} } @booklet {1312, title = {The assembly of multiple stage adaptive tests with discrete items}, year = {2003}, address = {Newtown, PA: Law School Admission Council Report}, author = {Armstrong, R. D. and Edmonds, J.J.} } @conference {876, title = {Assessing CAT security breaches by the item pooling index}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2003}, address = {Chicago IL}, author = {Chang, Hua-Hua and Zhang, J.} } @inbook {42, title = {Assessing question banks}, booktitle = {Reusing online resources: A sustanable approach to e-learning}, number = {1}, year = {2003}, pages = {171-230}, publisher = {Kogan Page Ltd.}, organization = {Kogan Page Ltd.}, address = {London, UK}, abstract = {In Chapter 14, Joanna Bull and James Daziel provide a comprehensive treatment of the issues surrounding the use of Question Banks and Computer Assisted Assessment, and provide a number of excellent examples of implementations. In their review of the technologies employed in Computer Assisted Assessment the authors include Computer Adaptive Testing and data generation. The authors reveal significant issues involving the impact of Intellectual Property rights and computer assisted assessment and make important suggestions for strategies to overcome these obstacles. (PsycINFO Database Record (c) 2005 APA )http://www-jime.open.ac.uk/2003/1/ (journal abstract)}, keywords = {Computer Assisted Testing, Curriculum Based Assessment, Education, Technology computerized adaptive testing}, author = {Bull, J. and Dalziel, J. and Vreeland, T.} } @conference {1255, title = {Assessing the efficiency of item selection in computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2003}, note = {{PDF file, 96 KB}}, address = {Chicago IL}, author = {Weissman, A.} } @article {2133, title = {a-Stratified multistage CAT design with content-blocking}, journal = {British Journal of Mathematical and Statistical Psychology}, volume = {56}, year = {2003}, pages = {359{\textendash}378}, author = {Yi, Q. and Chang, H.-H.} } @inbook {1923, title = {Bayesian checks on outlying response times in computerized adaptive testing}, year = {2003}, address = {H. Yanai, A. Okada, K. Shigemasu, Y. Kano, Y. and J. J. Meulman, (Eds.), New developments in psychometrics (pp. 215-222). New York: Springer-Verlag.}, author = {van der Linden, W. J.} } @article {275, title = {A Bayesian method for the detection of item preknowledge in computerized adaptive testing}, journal = {Applied Psychological Measurement}, volume = {27}, number = {2}, year = {2003}, pages = {121-137}, abstract = {With the increased use of continuous testing in computerized adaptive testing, new concerns about test security have evolved, such as how to ensure that items in an item pool are safeguarded from theft. In this article, procedures to detect test takers using item preknowledge are explored. When test takers use item preknowledge, their item responses deviate from the underlying item response theory (IRT) model, and estimated abilities may be inflated. This deviation may be detected through the use of person-fit indices. A Bayesian posterior log odds ratio index is proposed for detecting the use of item preknowledge. In this approach to person fit, the estimated probability that each test taker has preknowledge of items is updated after each item response. These probabilities are based on the IRT parameters, a model specifying the probability that each item has been memorized, and the test taker{\textquoteright}s item responses. Simulations based on an operational computerized adaptive test (CAT) pool are used to demonstrate the use of the odds ratio index. (PsycINFO Database Record (c) 2005 APA )}, keywords = {Adaptive Testing, Cheating, Computer Assisted Testing, Individual Differences computerized adaptive testing, Item, Item Analysis (Statistical), Mathematical Modeling, Response Theory}, author = {McLeod, L. and Lewis, C. and Thissen, D.} } @article {641, title = {A Bayesian method for the detection of item preknowledge in computerized adaptive testing}, journal = {Applied Psychological Measurement}, volume = {27}, year = {2003}, pages = {2, 121-137}, author = {McLeod L. D., Lewis, C., and Thissen, D.} } @conference {1152, title = {Calibrating CAT item pools and online pretest items using MCMC methods}, booktitle = {Paper presented at the Annual meeting of the National Council on Measurement in Education}, year = {2003}, note = {{PDF file, 155 KB}}, address = {Chicago IL}, author = {Segall, D. O.} } @conference {1094, title = {Calibrating CAT pools and online pretest items using marginal maximum likelihood methods}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2003}, note = {{PDF file, 284 KB}}, address = {Chicago IL}, author = {Pommerich, M and Segall, D. O.} } @conference {989, title = {Calibrating CAT pools and online pretest items using nonparametric and adjusted marginal maximum likelihood methods}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2003}, note = {PDF file, 128 K}, address = {Chicago IL}, author = {Krass, I. A. and Williams, B.} } @article {30, title = {Calibration of an item pool for assessing the burden of headaches: an application of item response theory to the Headache Impact Test (HIT)}, journal = {Quality of Life Research}, volume = {12}, number = {8}, year = {2003}, note = {0962-9343Journal Article}, pages = {913-933}, abstract = {BACKGROUND: Measurement of headache impact is important in clinical trials, case detection, and the clinical monitoring of patients. Computerized adaptive testing (CAT) of headache impact has potential advantages over traditional fixed-length tests in terms of precision, relevance, real-time quality control and flexibility. OBJECTIVE: To develop an item pool that can be used for a computerized adaptive test of headache impact. METHODS: We analyzed responses to four well-known tests of headache impact from a population-based sample of recent headache sufferers (n = 1016). We used confirmatory factor analysis for categorical data and analyses based on item response theory (IRT). RESULTS: In factor analyses, we found very high correlations between the factors hypothesized by the original test constructers, both within and between the original questionnaires. These results suggest that a single score of headache impact is sufficient. We established a pool of 47 items which fitted the generalized partial credit IRT model. By simulating a computerized adaptive health test we showed that an adaptive test of only five items had a very high concordance with the score based on all items and that different worst-case item selection scenarios did not lead to bias. CONCLUSION: We have established a headache impact item pool that can be used in CAT of headache impact.}, keywords = {*Cost of Illness, *Decision Support Techniques, *Sickness Impact Profile, Adolescent, Adult, Aged, Comparative Study, Disability Evaluation, Factor Analysis, Statistical, Headache/*psychology, Health Surveys, Human, Longitudinal Studies, Middle Aged, Migraine/psychology, Models, Psychological, Psychometrics/*methods, Quality of Life/*psychology, Software, Support, Non-U.S. Gov{\textquoteright}t}, author = {Bjorner, J. B. and Kosinski, M. and Ware, J. E., Jr.} } @article {239, title = {Can an item response theory-based pain item bank enhance measurement precision?}, journal = {Clinical Therapeutics}, volume = {25}, number = {Suppl D}, year = {2003}, pages = {D34-D36}, author = {Lai, J-S. and Dineen, K. and Cella, D. and Von Roenn, J.} } @conference {1054, title = {Can We Assess Pre-K Kids With Computer-Based Tests: STAR Early Literacy Data}, booktitle = {Presentation to the 33rd Annual National Conference on Large-Scale Assessment.}, year = {2003}, address = {San Antonio TX}, author = {J. R. McBride} } @booklet {1608, title = {CAT-ASVAB prototype Internet delivery system: Final report (FR-03-06)}, year = {2003}, note = {{PDF file, 393 KB}}, address = {Arlington VA: Human Resources Rsearch Organization}, author = {Sticha, P. J. and Barber, G.} } @conference {928, title = {Cognitive CAT in foreign language assessment}, booktitle = {Proceedings 11th International PEG Conference}, year = {2003}, address = {Powerful ICT Tools for Learning and Teaching, PEG {\textquoteright}03, CD-ROM, 2003}, author = {Giouroglou, H. and Economides, A. A.} } @article {63, title = {A comparative study of item exposure control methods in computerized adaptive testing}, journal = {Journal of Educational Measurement}, volume = {40}, number = {1}, year = {2003}, pages = {71-103}, abstract = {This study compared the properties of five methods of item exposure control within the purview of estimating examinees{\textquoteright} abilities in a computerized adaptive testing (CAT) context. Each exposure control algorithm was incorporated into the item selection procedure and the adaptive testing progressed based on the CAT design established for this study. The merits and shortcomings of these strategies were considered under different item pool sizes and different desired maximum exposure rates and were evaluated in light of the observed maximum exposure rates, the test overlap rates, and the conditional standard errors of measurement. Each method had its advantages and disadvantages, but no one possessed all of the desired characteristics. There was a clear and logical trade-off between item exposure control and measurement precision. The M. L. Stocking and C. Lewis conditional multinomial procedure and, to a slightly lesser extent, the T. Davey and C. G. Parshall method seemed to be the most promising considering all of the factors that this study addressed. (PsycINFO Database Record (c) 2005 APA )}, keywords = {Adaptive Testing, Computer Assisted Testing, Educational, Item Analysis (Statistical), Measurement, Strategies computerized adaptive testing}, author = {Chang, S-W. and Ansley, T. N.} } @conference {855, title = {A comparison of exposure control procedures in CAT systems based on different measurement models for testlets using the verbal reasoning section of the MCAT}, booktitle = {Paper presented at the Annual meeting of the National Council on Measurement in Education}, year = {2003}, note = {{PDF file, 405 KB}}, address = {Chicago IL}, author = {Boyd, A. M and Dodd, B. G. and Fitzpatrick, S. J.} } @conference {860, title = {A comparison of item exposure control procedures using a CAT system based on the generalized partial credit model}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2003}, note = {{PDF file, 265 KB}}, address = {Chicago IL}, author = {Burt, W. M and Kim, S.-J and Davis, L. L. and Dodd, B. G.} } @conference {901, title = {A comparison of learning potential results at various educational levels}, booktitle = {Paper presented at the 6th Annual Society for Industrial and Organisational Psychology of South Africa (SIOPSA) conference}, year = {2003}, note = {{PDF file, 391 KB}}, address = {25-27 June 2003}, author = {De Beer, M.} } @conference {1132, title = {Comparison of multi-stage tests with computer adaptive and paper and pencil tests}, booktitle = {Paper presented at the Annual meeting of the National Council on Measurement in Education}, year = {2003}, note = {{PDF file, 695 KB}}, address = {Chicago IL}, author = {Rotou, O. and Patsula, L. and Steffen, M. and Rizavi, S.} } @article {2129, title = {A computer adaptive testing simulation applied to the FIM instrument motor component}, journal = {Arch Phys Med Rehabil}, volume = {84}, number = {3}, year = {2003}, pages = {384-393}, author = {Dijkers, M.P.} } @article {268, title = {Computer-adaptive test for measuring personality factors using item response theory}, journal = {Dissertation Abstracts International: Section B: The Sciences \& Engineering}, volume = {64}, number = {2-B}, year = {2003}, pages = {999}, abstract = {The aim of the present research was to develop a computer adaptive test with the graded response model to measure the Five Factor Model of personality attributes. In the first of three studies, simulated items and simulated examinees were used to investigate systematically the impact of several variables on the accuracy and efficiency of a computer adaptive test. Item test banks containing more items, items with greater trait discrimination, and more response options resulted in increased accuracy and efficiency of the computer adaptive test. It was also found that large stopping rule values required fewer items before stopping but had less accuracy compared to smaller stopping rule values. This demonstrated a trade-off between accuracy and efficiency such that greater measurement accuracy can be obtained at a cost of decreased test efficiency. In the second study, the archival responses of 501 participants to five 30-item test banks measuring the Five Factor Model of personality were utilized in simulations of a computer adaptive personality test. The computer adaptive test estimates of participant trait scores were highly correlated with the item response theory trait estimates, and the magnitude of the correlation was related directly to the stopping rule value with higher correlations and less measurement error being associated with smaller stopping rule values. It was also noted that the performance of the computer adaptive test was dependent on the personality factor being measured whereby Conscientiousness required the most number of items to be administered and Neuroticism required the least. The results confirmed that a simulated computer adaptive test using archival personality data could accurately and efficiently attain trait estimates. In the third study, 276 student participants selected response options with a click of a mouse in a computer adaptive personality test (CAPT) measuring the Big Five factors of the Five Factor Model of personality structure. Participant responses to alternative measures of the Big Five were also collected using conventional paper-and-pencil personality questionnaires. It was found that the CAPT obtained trait estimates that were very accurate even with very few administered items. Similarly, the CAPT trait estimates demonstrated moderate to high concurrent validity with the alternative Big Five measures, and the strength of the estimates varied as a result of the similarity of the personality items and assessment methodology. It was also found that the computer adaptive test was accurately able to detect, with relatively few items, the relations between the measured personality traits and several socially interesting variables such as smoking behavior, alcohol consumption rating, and number of dates per month. Implications of the results of this research are discussed in terms of the utility of computer adaptive testing of personality characteristics. As well, methodological limitations of the studies are noted and directions for future research are considered. (PsycINFO Database Record (c) 2004 APA, all rights reserved).}, author = {Macdonald, Paul Lawrence} } @article {349, title = {Computerized adaptive rating scales for measuring managerial performance}, journal = {International Journal of Selection and Assessment}, volume = {11}, number = {2-3}, year = {2003}, pages = {237-246}, abstract = {Computerized adaptive rating scales (CARS) had been developed to measure contextual or citizenship performance. This rating format used a paired-comparison protocol, presenting pairs of behavioral statements scaled according to effectiveness levels, and an iterative item response theory algorithm to obtain estimates of ratees{\textquoteright} citizenship performance (W. C. Borman et al, 2001). In the present research, we developed CARS to measure the entire managerial performance domain, including task and citizenship performance, thus addressing a major limitation of the earlier CARS. The paper describes this development effort, including an adjustment to the algorithm that reduces substantially the number of item pairs required to obtain almost as much precision in the performance estimates. (PsycINFO Database Record (c) 2005 APA )}, keywords = {Adaptive Testing, Algorithms, Associations, Citizenship, Computer Assisted Testing, Construction, Contextual, Item Response Theory, Job Performance, Management, Management Personnel, Rating Scales, Test}, author = {Schneider, R. J. and Goff, M. and Anderson, S. and Borman, W. C.} } @inbook {1869, title = {Computerized adaptive testing}, year = {2003}, address = {R. Fern{\'a}ndez-Ballesteros (Ed.): Encyclopaedia of Psychological Assessment. London: Sage.}, author = {Ponsoda, V. and Olea, J.} } @conference {1003, title = {Computerized adaptive testing: A comparison of three content balancing methods}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2003}, note = {{PDF file, 227 KB}}, address = {Chicago IL}, author = {Leung, C-K.. and Chang, Hua-Hua and Hau, K-T. and Wen. Z.} } @article {251, title = {Computerized adaptive testing: A comparison of three content balancing methods}, journal = {The Journal of Technology, Learning and Assessment}, volume = {2}, number = {5}, year = {2003}, pages = {1-15}, abstract = {Content balancing is often a practical consideration in the design of computerized adaptive testing (CAT). This study compared three content balancing methods, namely, the constrained CAT (CCAT), the modified constrained CAT (MCCAT), and the modified multinomial model (MMM), under various conditions of test length and target maximum exposure rate. Results of a series of simulation studies indicate that there is no systematic effect of content balancing method in measurement efficiency and pool utilization. However, among the three methods, the MMM appears to consistently over-expose fewer items.}, author = {Leung, C-K.. and Chang, Hua-Hua and Hau, K-T.} } @article {505, title = {Computerized adaptive testing using the nearest-neighbors criterion}, journal = {Applied Psychological Measurement}, volume = {27}, year = {2003}, pages = {204-216}, author = {Cheng, P. E. and Liou, M.} } @article {75, title = {Computerized adaptive testing using the nearest-neighbors criterion}, journal = {Applied Psychological Measurement}, volume = {27}, number = {3}, year = {2003}, pages = {204-216}, abstract = {Item selection procedures designed for computerized adaptive testing need to accurately estimate every taker{\textquoteright}s trait level (θ) and, at the same time, effectively use all items in a bank. Empirical studies showed that classical item selection procedures based on maximizing Fisher or other related information yielded highly varied item exposure rates; with these procedures, some items were frequently used whereas others were rarely selected. In the literature, methods have been proposed for controlling exposure rates; they tend to affect the accuracy in θ estimates, however. A modified version of the maximum Fisher information (MFI) criterion, coined the nearest neighbors (NN) criterion, is proposed in this study. The NN procedure improves to a moderate extent the undesirable item exposure rates associated with the MFI criterion and keeps sufficient precision in estimates. The NN criterion will be compared with a few other existing methods in an empirical study using the mean squared errors in θ estimates and plots of item exposure rates associated with different distributions. (PsycINFO Database Record (c) 2005 APA ) (journal abstract)}, keywords = {(Statistical), Adaptive Testing, Computer Assisted Testing, Item Analysis, Item Response Theory, Statistical Analysis, Statistical Estimation computerized adaptive testing, Statistical Tests}, author = {Cheng, P. E. and Liou, M.} } @article {156, title = {Computerized adaptive testing with item cloning}, journal = {Applied Psychological Measurement}, volume = {27}, number = {4}, year = {2003}, note = {References .Sage Publications, US}, pages = {247-261}, abstract = {(from the journal abstract) To increase the number of items available for adaptive testing and reduce the cost of item writing, the use of techniques of item cloning has been proposed. An important consequence of item cloning is possible variability between the item parameters. To deal with this variability, a multilevel item response (IRT) model is presented which allows for differences between the distributions of item parameters of families of item clones. A marginal maximum likelihood and a Bayesian procedure for estimating the hyperparameters are presented. In addition, an item-selection procedure for computerized adaptive testing with item cloning is presented which has the following two stages: First, a family of item clones is selected to be optimal at the estimate of the person parameter. Second, an item is randomly selected from the family for administration. Results from simulation studies based on an item pool from the Law School Admission Test (LSAT) illustrate the accuracy of these item pool calibration and adaptive testing procedures. (PsycINFO Database Record (c) 2003 APA, all rights reserved).}, keywords = {computerized adaptive testing}, author = {Glas, C. A. W. and van der Linden, W. J.} } @conference {1213, title = {Constraining item exposure in computerized adaptive testing with shadow tests}, booktitle = {Paper presented at the Annual meeting of the National Council on Measurement in Education}, year = {2003}, note = {$\#$vdLI03-02}, address = {Chicago IL}, author = {van der Linden, W. J. and Veldkamp, B. P.} } @conference {829, title = {Constructing rotating item pools for constrained adaptive testing}, booktitle = {Paper presented at the Annual meeting of the National Council on Measurement in Education}, year = {2003}, note = {{PDF file, 395 KB}}, address = {Chicago IL}, author = {Ariel, A. and Veldkamp, B. and van der Linden, W. J.} } @conference {1215, title = {Controlling item exposure and item eligibility in computerized adaptive testing}, year = {2003}, author = {van der Linden, W. J. and Veldkamp, B. P.} } @conference {1140, title = {Criterion item characteristic curve function for evaluating the differential weight procedure adjusted to on-line item calibration}, booktitle = {Paper presented at the annual meeting of the NCME}, year = {2003}, address = {Chicago IL}, author = {Samejima, F.} } @article {34, title = {Developing an initial physical function item bank from existing sources}, journal = {Journal of Applied Measurement}, volume = {4}, number = {2}, year = {2003}, note = {1529-7713Journal Article}, pages = {124-36}, abstract = {The objective of this article is to illustrate incremental item banking using health-related quality of life data collected from two samples of patients receiving cancer treatment. The kinds of decisions one faces in establishing an item bank for computerized adaptive testing are also illustrated. Pre-calibration procedures include: identifying common items across databases; creating a new database with data from each pool; reverse-scoring "negative" items; identifying rating scales used in items; identifying pivot points in each rating scale; pivot anchoring items at comparable rating scale categories; and identifying items in each instrument that measure the construct of interest. A series of calibrations were conducted in which a small proportion of new items were added to the common core and misfitting items were identified and deleted until an initial item bank has been developed.}, keywords = {*Databases, *Sickness Impact Profile, Adaptation, Psychological, Data Collection, Humans, Neoplasms/*physiopathology/psychology/therapy, Psychometrics, Quality of Life/*psychology, Research Support, U.S. Gov{\textquoteright}t, P.H.S., United States}, author = {Bode, R. K. and Cella, D. and Lai, J. S. and Heinemann, A. W.} } @article {517, title = {Development and psychometric evaluation of the Flexilevel Scale of Shoulder Function (FLEX-SF)}, journal = {Medical Care (in press)}, year = {2003}, note = {$\#$CO03-01}, author = {Cook, K. F. and Roddey, T. S. and Gartsman, G M and Olson, S L} } @conference {902, title = {Development of the Learning Potential Computerised Adaptive Test (LPCAT)}, booktitle = {Unpublished manuscript. }, year = {2003}, note = {{PDF file, 563 KB}}, author = {De Beer, M.} } @article {359, title = {Development, reliability, and validity of a computerized adaptive version of the Schedule for Nonadaptive and Adaptive Personality}, journal = {Dissertation Abstracts International: Section B: The Sciences \& Engineering}, volume = {63}, number = {7-B}, year = {2003}, pages = {3485}, abstract = {Computerized adaptive testing (CAT) and Item Response Theory (IRT) techniques were applied to the Schedule for Nonadaptive and Adaptive Personality (SNAP) to create a more efficient measure with little or no cost to test reliability or validity. The SNAP includes 15 factor analytically derived and relatively unidimensional traits relevant to personality disorder. IRT item parameters were calibrated on item responses from a sample of 3,995 participants who completed the traditional paper-and-pencil (P\&P) SNAP in a variety of university, community, and patient settings. Computerized simulations were conducted to test various adaptive testing algorithms, and the results informed the construction of the CAT version of the SNAP (SNAP-CAT). A validation study of the SNAP-CAT was conducted on a sample of 413 undergraduates who completed the SNAP twice, separated by one week. Participants were randomly assigned to one of four groups who completed (1) a modified P\&P version of the SNAP (SNAP-PP) twice (n = 106), (2) the SNAP-PP first and the SNAP-CAT second (n = 105), (3) the SNAP-CAT first and the SNAP-PP second (n = 102), and (4) the SNAP-CAT twice (n = 100). Results indicated that the SNAP-CAT was 58\% and 60\% faster than the traditional P\&P version, at Times 1 and 2, respectively, and mean item savings across scales were 36\% and 37\%, respectively. These savings came with minimal cost to reliability or validity, and the two test forms were largely equivalent. Descriptive statistics, rank-ordering of scores, internal factor structure, and convergent/discriminant validity were highly comparable across testing modes and methods of scoring, and very few differences between forms replicated across testing sessions. In addition, participants overwhelmingly preferred the computerized version to the P\&P version. However, several specific problems were identified for the Self-harm and Propriety scales of the SNAP-CAT that appeared to be broadly related to IRT calibration difficulties. Reasons for these anomalous findings are discussed, and follow-up studies are suggested. Despite these specific problems, the SNAP-CAT appears to be a viable alternative to the traditional P\&P SNAP. (PsycINFO Database Record (c) 2003 APA, all rights reserved).}, author = {Simms, L. J.} } @booklet {1340, title = {Effect of extra time on GRE{\textregistered} Quantitative and Verbal Scores (Research Report 03-13)}, year = {2003}, note = {{PDF file, 88 KB}}, address = {Princeton NJ: Educational Testing service}, author = {Bridgeman, B. and Cline, F. and Hessinger, J.} } @conference {1012, title = {The effect of item selection method on the variability of CAT{\textquoteright}s ability estimates when item parameters are contaminated with measurement errors}, booktitle = {Paper presented at the Annual meeting of the National Council on Measurement in Education}, year = {2003}, note = {{PDF file, 275 KB}}, address = {Chicago IL}, author = {Li, Y. H. and Schafer, W. D.} } @booklet {1413, title = {The effects of model misfit in computerized classification test}, year = {2003}, note = {{PDF file, 432 KB}}, address = {Paper presented at the annual meeting of the National Council on Measurement in Education, Chicago IL}, author = {Jiao, H. and Lau, A. C.} } @article {213, title = {The effects of model specification error in item response theory-based computerized classification test using sequential probability ratio test}, journal = {Dissertation Abstracts International Section A: Humanities \& Social Sciences}, volume = {64}, number = {2-A}, year = {2003}, pages = {478}, abstract = {This study investigated the effects of model specification error on classification accuracy, error rates, and average test length in Item Response Theory (IRT) based computerized classification test (CCT) using sequential probability ratio test (SPRT) in making binary decisions from examinees{\textquoteright} dichotomous responses. This study consisted of three sub-studies. In each sub-study, one of the three unidimensional dichotomous IRT models, the 1-parameter logistic (IPL), the 2-parameter logistic (2PL), and the 3-parameter logistic (3PL) model was set as the true model and the other two models were treated as the misfit models. Item pool composition, test length, and stratum depth were manipulated to simulate different test conditions. To ensure the validity of the study results, the true model based CCTs using the true and the recalibrated item parameters were compared first to study the effect of estimation error in item parameters in CCTs. Then, the true model and the misfit model based CCTs were compared to accomplish the research goal, The results indicated that estimation error in item parameters did not affect classification results based on CCTs using SPRT. The effect of model specification error depended on the true model, the misfit model, and the item pool composition. When the IPL or the 2PL IRT model was the true model, the use of another IRT model had little impact on the CCT results. When the 3PL IRT model was the true model, the use of the 1PL model raised the false positive error rates. The influence of using the 2PL instead of the 3PL model depended on the item pool composition. When the item discrimination parameters varied greatly from uniformity of one, the use of the 2PL IRT model raised the false negative error rates to above the nominal level. In the simulated test conditions with test length and item exposure constraints, using a misfit model in CCTs most often affected the average test length. Its effects on error rates and classification accuracy were negligible. It was concluded that in CCTs using SPRT, IRT model selection and evaluation is indispensable (PsycINFO Database Record (c) 2004 APA, all rights reserved).}, author = {Jiao, H.} } @conference {1280, title = {Effects of test administration mode on item parameter estimates}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2003}, note = {{PDF file, 233 KB}}, address = {Chicago IL}, author = {Yi, Q. and Harris, D. J. and Wang, T. and Ban, J-C.} } @conference {1024, title = {Evaluating a new approach to detect aberrant responses in CAT}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2003}, note = {stimation of Ability Level by Using Only Observable Quantities in Adaptive Testing.}, address = {Chicago IL}, author = {Lu, Y., and Robin, F.} } @conference {1291, title = {Evaluating computer-based test security by generalized item overlap rates}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2003}, address = {Chicago IL}, author = {Zhang, J. and Lu, T.} } @conference {1022, title = {Evaluating computerized adaptive testing design for the MCAT with realistic simulated data}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2003}, note = {{PDF file, 985 KB}}, address = {Chicago IL}, author = {Lu, Y., and Pitoniak, M. and Rizavi, S. and Way, W. D. and Steffan, M.} } @conference {1201, title = {Evaluating stability of online item calibrations under varying conditions}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2003}, address = {Chicago IL}, author = {Thomasson, G. L.} } @conference {1096, title = {Evaluating the comparability of English- and French-speaking examinees on a science achievement test administered using two-stage testing}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2003}, note = {PDF file, 568 K}, address = {Chicago IL}, author = {Puhan, G. and Gierl, M.} } @conference {1966, title = {The evaluation of exposure control procedures for an operational CAT. }, booktitle = {Paper presented at the Annual Meeting of the American Educational Research Association}, year = {2003}, address = {Chicago IL}, author = {French, B. F. and Thompson, T. T.} } @article {96, title = {An examination of exposure control and content balancing restrictions on item selection in CATs using the partial credit model}, journal = {Journal of Applied Measurement}, volume = {4}, number = {1}, year = {2003}, note = {1529-7713Journal Article}, pages = {24-42}, abstract = {The purpose of the present investigation was to systematically examine the effectiveness of the Sympson-Hetter technique and rotated content balancing relative to no exposure control and no content rotation conditions in a computerized adaptive testing system (CAT) based on the partial credit model. A series of simulated fixed and variable length CATs were run using two data sets generated to multiple content areas for three sizes of item pools. The 2 (exposure control) X 2 (content rotation) X 2 (test length) X 3 (item pool size) X 2 (data sets) yielded a total of 48 conditions. Results show that while both procedures can be used with no deleterious effect on measurement precision, the gains in exposure control, pool utilization, and item overlap appear quite modest. Difficulties involved with setting the exposure control parameters in small item pools make questionable the utility of the Sympson-Hetter technique with similar item pools.}, keywords = {*Computers, *Educational Measurement, *Models, Theoretical, Automation, Decision Making, Humans, Reproducibility of Results}, author = {Davis, L. L. and Pastor, D. A. and Dodd, B. G. and Chiang, C. and Fitzpatrick, S. J.} } @conference {1029, title = {Exposure control using adaptive multi-stage item bundles}, booktitle = {Paper presented at the Annual meeting of the National Council on Measurement in Education}, year = {2003}, note = {{PDF file, 116 KB}}, address = {Chicago IL}, author = {Luecht, RM} } @conference {260, title = {Exposure control using adaptive multi-stage item bundles}, booktitle = {annual meeting of the National Council on Measurement in Education}, year = {2003}, address = {Chicago, IL. USA}, author = {Luecht, RM} } @article {31, title = {The feasibility of applying item response theory to measures of migraine impact: a re-analysis of three clinical studies}, journal = {Quality of Life Research}, volume = {12}, number = {8}, year = {2003}, note = {0962-9343Journal Article}, pages = {887-902}, abstract = {BACKGROUND: Item response theory (IRT) is a powerful framework for analyzing multiitem scales and is central to the implementation of computerized adaptive testing. OBJECTIVES: To explain the use of IRT to examine measurement properties and to apply IRT to a questionnaire for measuring migraine impact--the Migraine Specific Questionnaire (MSQ). METHODS: Data from three clinical studies that employed the MSQ-version 1 were analyzed by confirmatory factor analysis for categorical data and by IRT modeling. RESULTS: Confirmatory factor analyses showed very high correlations between the factors hypothesized by the original test constructions. Further, high item loadings on one common factor suggest that migraine impact may be adequately assessed by only one score. IRT analyses of the MSQ were feasible and provided several suggestions as to how to improve the items and in particular the response choices. Out of 15 items, 13 showed adequate fit to the IRT model. In general, IRT scores were strongly associated with the scores proposed by the original test developers and with the total item sum score. Analysis of response consistency showed that more than 90\% of the patients answered consistently according to a unidimensional IRT model. For the remaining patients, scores on the dimension of emotional function were less strongly related to the overall IRT scores that mainly reflected role limitations. Such response patterns can be detected easily using response consistency indices. Analysis of test precision across score levels revealed that the MSQ was most precise at one standard deviation worse than the mean impact level for migraine patients that are not in treatment. Thus, gains in test precision can be achieved by developing items aimed at less severe levels of migraine impact. CONCLUSIONS: IRT proved useful for analyzing the MSQ. The approach warrants further testing in a more comprehensive item pool for headache impact that would enable computerized adaptive testing.}, keywords = {*Sickness Impact Profile, Adolescent, Adult, Aged, Comparative Study, Cost of Illness, Factor Analysis, Statistical, Feasibility Studies, Female, Human, Male, Middle Aged, Migraine/*psychology, Models, Psychological, Psychometrics/instrumentation/*methods, Quality of Life/*psychology, Questionnaires, Support, Non-U.S. Gov{\textquoteright}t}, author = {Bjorner, J. B. and Kosinski, M. and Ware, J. E., Jr.} } @article {457, title = {A feasibility study of on-the-fly item generation in adaptive testing}, journal = { Journal of Technology, Learning, and Assessment}, volume = {2 }, year = {2003}, note = {{PDF file, 427 KB} }, author = {Bejar, I. I. and Lawless, R. R., and Morley, M. E., and Wagner, M. E., and Bennett R. E., and Revuelta, J.} } @conference {1216, title = {Implementing an alternative to Sympson-Hetter item-exposure control in constrained adaptive testing}, booktitle = {Paper presented at the Annual meeting of the National Council on Measurement in Education}, year = {2003}, address = {Chicago IL}, author = {Veldkamp, B. P. and van der Linden, W. J.} } @article {745, title = {Implementing content constraints in alpha-stratified adaptive testing using a shadow test approach}, journal = {Applied Psychological Measurement}, volume = {27}, year = {2003}, pages = {107-120}, author = {van der Linden, W. J. and Chang, Hua-Hua} } @conference {1283, title = {Implementing the a-stratified method with b blocking in computerized adaptive testing with the generalized partial credit model}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2003}, note = {$\#$YI03-01 {PDF file, 496 KB}}, address = {Chicago IL}, author = {Yi, Q. and Wang, T. and Wang, S} } @article {2169, title = {Incorporation Of Content Balancing Requirements In Stratification Designs For Computerized Adaptive Testing}, journal = {Educational and Psychological Measurement}, volume = {63}, number = {2}, year = {2003}, pages = {257-270}, abstract = {

In computerized adaptive testing, the multistage a-stratified design advocates a new philosophy on pool management and item selection in which, contradictory to common practice, less discriminating items are used first. The method is effective in reducing item-overlap rate and enhancing pool utilization. This stratification method has been extended in different ways to deal with the practical issues of content constraints and the positive correlation between item difficulty and discrimination. Nevertheless, these modified designs on their own do not automatically satisfy content requirements. In this study, three stratification designs were examined in conjunction with three well developed content balancing methods. The performance of each of these nine combinational methods was evaluated in terms of their item security, measurement efficiency, and pool utilization. Results showed substantial differences in item-overlap rate and pool utilization among different methods. An optimal combination of stratification design and content balancing method is recommended.

}, doi = {10.1177/0013164403251326}, url = {http://epm.sagepub.com/content/63/2/257.abstract}, author = {Leung, Chi-Keung and Chang, Hua-Hua and Hau, Kit-Tai} } @article {250, title = {Incorporation of Content Balancing Requirements in Stratification Designs for Computerized Adaptive Testing}, journal = {Educational and Psychological Measurement}, volume = {63}, number = {2}, year = {2003}, pages = {257-70}, abstract = {Studied three stratification designs for computerized adaptive testing in conjunction with three well-developed content balancing methods. Simulation study results show substantial differences in item overlap rate and pool utilization among different methods. Recommends an optimal combination of stratification design and content balancing method. (SLD)}, keywords = {computerized adaptive testing}, author = {Leung, C-K.. and Chang, Hua-Hua and Hau, K-T.} } @conference {1013, title = {Increasing the homogeneity of CAT{\textquoteright}s item-exposure rates by minimizing or maximizing varied target functions while assembling shadow tests}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2003}, note = {(PDF file, 418 K}, address = {Chicago IL}, author = {Li, Y. H. and Schafer, W. D.} } @conference {1256, title = {Information theoretic approaches to item selection}, booktitle = {Paper presented at the 13th international meeting of the Psychometric Society}, year = {2003}, address = {Sardinia, Italy}, author = {Weissman, A.} } @conference {1073, title = {Issues in maintaining scale consistency for the CAT-ASVAB}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2003}, address = {Chicago IL}, author = {Nicewander, W. A.} } @article {238, title = {Item banking to improve, shorten and computerized self-reported fatigue: an illustration of steps to create a core item bank from the FACIT-Fatigue Scale}, journal = {Quality of Life Research}, volume = {12}, number = {5}, year = {2003}, note = {0962-9343Journal Article}, month = {Aug}, pages = {485-501}, abstract = {Fatigue is a common symptom among cancer patients and the general population. Due to its subjective nature, fatigue has been difficult to effectively and efficiently assess. Modern computerized adaptive testing (CAT) can enable precise assessment of fatigue using a small number of items from a fatigue item bank. CAT enables brief assessment by selecting questions from an item bank that provide the maximum amount of information given a person{\textquoteright}s previous responses. This article illustrates steps to prepare such an item bank, using 13 items from the Functional Assessment of Chronic Illness Therapy Fatigue Subscale (FACIT-F) as the basis. Samples included 1022 cancer patients and 1010 people from the general population. An Item Response Theory (IRT)-based rating scale model, a polytomous extension of the Rasch dichotomous model was utilized. Nine items demonstrating acceptable psychometric properties were selected and positioned on the fatigue continuum. The fatigue levels measured by these nine items along with their response categories covered 66.8\% of the general population and 82.6\% of the cancer patients. Although the operational CAT algorithms to handle polytomously scored items are still in progress, we illustrated how CAT may work by using nine core items to measure level of fatigue. Using this illustration, a fatigue measure comparable to its full-length 13-item scale administration was obtained using four items. The resulting item bank can serve as a core to which will be added a psychometrically sound and operational item bank covering the entire fatigue continuum.}, keywords = {*Health Status Indicators, *Questionnaires, Adult, Fatigue/*diagnosis/etiology, Female, Humans, Male, Middle Aged, Neoplasms/complications, Psychometrics, Research Support, Non-U.S. Gov{\textquoteright}t, Research Support, U.S. Gov{\textquoteright}t, P.H.S., Sickness Impact Profile}, author = {Lai, J-S. and Crane, P. K. and Cella, D. and Chang, C-H. and Bode, R. K. and Heinemann, A. W.} } @article {94, title = {Item exposure constraints for testlets in the verbal reasoning section of the MCAT}, journal = {Applied Psychological Measurement}, volume = {27}, number = {5}, year = {2003}, pages = {335-356}, abstract = {The current study examined item exposure control procedures for testlet scored reading passages in the Verbal Reasoning section of the Medical College Admission Test with four computerized adaptive testing (CAT) systems using the partial credit model. The first system used a traditional CAT using maximum information item selection. The second used random item selection to provide a baseline for optimal exposure rates. The third used a variation of Lunz and Stahl{\textquoteright}s randomization procedure. The fourth used Luecht and Nungester{\textquoteright}s computerized adaptive sequential testing (CAST) system. A series of simulated fixed-length CATs was run to determine the optimal item length selection procedure. Results indicated that both the randomization procedure and CAST performed well in terms of exposure control and measurement precision, with the CAST system providing the best overall solution when all variables were taken into consideration. (PsycINFO Database Record (c) 2005 APA ) (journal abstract)}, keywords = {Adaptive Testing, Computer Assisted Testing, Entrance Examinations, Item Response Theory, Random Sampling, Reasoning, Verbal Ability computerized adaptive testing}, author = {Davis, L. L. and Dodd, B. G.} } @conference {1111, title = {Item pool design for computerized adaptive tests}, booktitle = {Paper presented at the Annual meeting of the National Council on Measurement in Education}, year = {2003}, note = {{PDF file, 135 KB}}, address = {Chicago IL}, author = {Reckase, M. D.} } @inbook {1927, title = {Item selection in polytomous CAT}, year = {2003}, note = {$\#$VE03207 {PDF file, 79 KB}}, address = {H. Yanai, A. Okada, K. Shigemasu, Y Kano, and J. J. Meulman (eds.), New developments in psychometrics (pp. 207-214). Tokyo, Japan: Springer-Verlag.}, author = {Veldkamp, B. P.} } @inbook {414, title = {Item selection in polytomous CAT}, booktitle = {New developments in psychometrics}, year = {2003}, pages = {207{\textendash}214}, publisher = {Psychometric Society, Springer}, organization = {Psychometric Society, Springer}, address = {Tokyo, Japan}, keywords = {computerized adaptive testing}, author = {Veldkamp, B. P.}, editor = {A. Okada and K. Shigenasu and Y. Kano and J. Meulman} } @conference {1172, title = {Maintaining scale in computer adaptive testing}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2003}, note = {{PDF file, 367 KB}}, address = {Chicago IL}, author = {Smith, R. L. and Rizavi, S. and Paez, R. and Damiano, M. and Herbert, E.} } @booklet {1314, title = {A method to determine targets for multi-stage adaptive tests}, year = {2003}, note = {{PDF file, 207 KB}}, address = {Unpublished manuscript}, author = {Armstrong, R. D. and Roussos, L.} } @conference {1023, title = {Methods for item set selection in adaptive testing}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2003}, note = {PDF file, 443 K}, address = {Chicago IL}, author = {Lu, Y., and Rizavi, S.} } @conference {1011, title = {Multidimensional computerized adaptive testing in recovering reading and mathematics abilities}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2003}, note = {{PDF file, 592 KB}}, address = {Chicago, IL}, author = {Li, Y. H. and Schafer, W. D.} } @booklet {1415, title = {A multidimensional IRT mechanism for better understanding adaptive test behavior}, year = {2003}, address = {Paper presented at the annual meeting of the National Council on Measurement in Education, Chicago IL}, author = {Jodoin, M.} } @conference {934, title = {Online calibration and scale stability of a CAT program}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2003}, note = {{PDF file, 274 KB}}, address = {Chicago IL}, author = {Guo, F. and Wang, G.} } @article {805, title = {An optimal design approach to criterion-referenced computerized testing}, journal = {Journal of Educational Measurement}, volume = {28}, year = {2003}, pages = {97-100}, author = {Wiberg, M.} } @article {57, title = {Optimal stratification of item pools in α-stratified computerized adaptive testing}, journal = {Applied Psychological Measurement}, volume = {27}, number = {4}, year = {2003}, pages = {262-274}, abstract = {A method based on 0-1 linear programming (LP) is presented to stratify an item pool optimally for use in α-stratified adaptive testing. Because the 0-1 LP model belongs to the subclass of models with a network flow structure, efficient solutions are possible. The method is applied to a previous item pool from the computerized adaptive testing (CAT) version of the Graduate Record Exams (GRE) Quantitative Test. The results indicate that the new method performs well in practical situations. It improves item exposure control, reduces the mean squared error in the θ estimates, and increases test reliability. (PsycINFO Database Record (c) 2005 APA ) (journal abstract)}, keywords = {Adaptive Testing, Computer Assisted Testing, Item Content (Test), Item Response Theory, Mathematical Modeling, Test Construction computerized adaptive testing}, author = {Chang, Hua-Hua and van der Linden, W. J.} } @conference {912, title = {Optimal testing with easy items in computerized adaptive testing}, booktitle = {Paper presented at the conference of the International Association for Educational Assessment}, year = {2003}, address = {Manchester UK}, author = {Theo Eggen and Verschoor, A.} } @conference {883, title = {Predicting item exposure parameters in computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2003}, note = {{PDF file, 239 KB}}, address = {Chicago IL}, author = {Chen, S-Y. and Doong, H.} } @article {680, title = {Psychometric and psychological effects of item selection and review on computerized testing}, journal = {Educational and Psychological Measurement}, volume = {63}, year = {2003}, pages = {791-808}, author = {Revuelta, J. and Xim{\'e}nez, M. C. and Olea, J.} } @article {2170, title = {Psychometric and Psychological Effects of Item Selection and Review on Computerized Testing}, journal = {Educational and Psychological Measurement}, volume = {63}, number = {5}, year = {2003}, pages = {791-808}, abstract = {

Psychometric properties of computerized testing, together with anxiety and comfort of examinees, are investigated in relation to item selection routine and the opportunity for response review. Two different hypotheses involving examinee anxiety were used to design test properties: perceived control and perceived performance. The study involved three types of administration of a computerized English test for Spanish speakers (adaptive, easy adaptive, and fixed) and four review conditions (no review, review at end, review by blocks of 5 items, and review item-by-item). These were applied to a sample of 557 first-year psychology undergraduate students to examine main and interaction effects of test type and review on psychometric and psychological variables. Statistically significant effects were found in test precision among the different types of test. Response review improved ability estimates and increased testing time. No psychological effects on anxiety were found. Examinees in all review conditions considered more important the possibility of review than those who were not allowed to review. These results concur with previous findings on examinees\&$\#$39; preference for item review and raise some issues that should be addressed in the field of tests with item review.

}, doi = {10.1177/0013164403251282}, url = {http://epm.sagepub.com/content/63/5/791.abstract}, author = {Revuelta, Javier and Xim{\'e}nez, M. Carmen and Olea, Julio} } @article {215, title = {Psychometric properties of several computer-based test designs with ideal and constrained item pools}, journal = {Dissertation Abstracts International: Section B: The Sciences \& Engineering}, volume = {64}, number = {6-B}, year = {2003}, pages = {2978}, abstract = {The purpose of this study was to compare linear fixed length test (LFT), multi stage test (MST), and computer adaptive test (CAT) designs under three levels of item pool quality, two levels of match between test and item pool content specifications, two levels of test length, and several levels of exposure control expected to be practical for a number of testing programs. This design resulted in 132 conditions that were evaluated using a simulation study with 9000 examinees on several measures of overall measurement precision including reliability, the mean error and root mean squared error between true and estimated ability levels, classification precision including decision accuracy, false positive and false negative rates, and Kappa for cut scores corresponding to 30\%, 50\%, and 85\% failure rates, and conditional measurement precision with the conditional root mean squared error between true and estimated ability levels conditioned on 25 true ability levels. Test reliability, overall and conditional measurement precision, and classification precision increased with item pool quality and test length, and decreased with less adequate match between item pool and test specification match. In addition, as the maximum exposure rate decreased and the type of exposure control implemented became more restrictive, test reliability, overall and conditional measurement precision, and classification precision decreased. Within item pool quality, match between test and item pool content specifications, test length, and exposure control, CAT designs showed superior psychometric properties as compared to MST designs which in turn were superior to LFT designs. However, some caution is warranted in interpreting these results since the ability of the automated test assembly software to construct test that met specifications was limited in conditions where pool usage was high. The practical importance of the differences between test designs on the evaluation criteria studied is discussed with respect to the inferences test users seek to make from test scores and nonpsychometric factors that may be important in some testing programs. (PsycINFO Database Record (c) 2004 APA, all rights reserved).}, author = {Jodoin, M. G.} } @conference {943, title = {Recalibration of IRT item parameters in CAT: Sparse data matrices and missing data treatments}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2003}, note = {(PDF file, 626 K}, address = {Chicago IL}, author = {Harmes, J. C. and Parshall, C. G. and Kromrey, J. D.} } @article {499, title = {The relationship between item exposure and test overlap in computerized adaptive testing}, journal = {Journal of Educational Measurement}, volume = {40}, year = {2003}, pages = {129-145}, author = {Chen, S. and Ankenmann, R. D. and Spray, J. A.} } @article {503, title = {The relationship between item exposure and test overlap in computerized adaptive testing}, journal = {Journal of Educational Measurement}, volume = {40}, year = {2003}, pages = {129-145}, author = {Chen, S. and Ankenmann, R. D. and Spray, J. A.} } @article {68, title = {The relationship between item exposure and test overlap in computerized adaptive testing}, journal = {Journal of Educational Measurement}, volume = {40}, number = {2}, year = {2003}, pages = {129-145}, abstract = {The purpose of this article is to present an analytical derivation for the mathematical form of an average between-test overlap index as a function of the item exposure index, for fixed-length computerized adaptive tests (CATs). This algebraic relationship is used to investigate the simultaneous control of item exposure at both the item and test levels. The results indicate that, in fixed-length CATs, control of the average between-test overlap is achieved via the mean and variance of the item exposure rates of the items that constitute the CAT item pool. The mean of the item exposure rates is easily manipulated. Control over the variance of the item exposure rates can be achieved via the maximum item exposure rate (r-sub(max)). Therefore, item exposure control methods which implement a specification of r-sub(max) (e.g., J. B. Sympson and R. D. Hetter, 1985) provide the most direct control at both the item and test levels. (PsycINFO Database Record (c) 2005 APA )}, keywords = {(Statistical), Adaptive Testing, Computer Assisted Testing, Human Computer, Interaction computerized adaptive testing, Item Analysis, Item Analysis (Test), Test Items}, author = {Chen, S-Y. and Ankemann, R. D. and Spray, J. A.} } @booklet {1536, title = {A sequential Bayes procedure for item calibration in multi-stage testing}, year = {2003}, address = {Manuscript in preparation}, author = {van der Linden, W. J. and Alan D Mead} } @conference {1275, title = {A simulation study to compare CAT strategies for cognitive diagnosis}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2003}, note = {{PDF file, 250 KB}}, address = {Chicago IL}, author = {Xu, X. and Chang, Hua-Hua and Douglas, J.} } @article {379, title = {Small sample estimation in dichotomous item response models: Effect of priors based on judgmental information on the accuracy of item parameter estimates}, journal = {Applied Psychological Measurement}, volume = {27}, number = {1}, year = {2003}, note = {Sage Publications, US}, pages = {27-51}, abstract = {Large item banks with properly calibrated test items are essential for ensuring the validity of computer-based tests. At the same time, item calibrations with small samples are desirable to minimize the amount of pretesting and limit item exposure. Bayesian estimation procedures show considerable promise with small examinee samples. The purposes of the study were (a) to examine how prior information for Bayesian item parameter estimation can be specified and (b) to investigate the relationship between sample size and the specification of prior information on the accuracy of item parameter estimates. The results of the simulation study were clear: Estimation of item response theory (IRT) model item parameters can be improved considerably. Improvements in the one-parameter model were modest; considerable improvements with the two- and three-parameter models were observed. Both the study of different forms of priors and ways to improve the judgmental data used in forming the priors appear to be promising directions for future research. }, author = {Swaminathan, H. and Hambleton, R. K. and Sireci, S. G. and Xing, D. and Rizavi, S. M.} } @article {397, title = {Some alternatives to Sympson-Hetter item-exposure control in computerized adaptive testing}, journal = {Journal of Educational and Behavioral Statistics}, volume = {28}, number = {3}, year = {2003}, pages = {249-265}, abstract = {TheHetter and Sympson (1997; 1985) method is a method of probabilistic item-exposure control in computerized adaptive testing. Setting its control parameters to admissible values requires an iterative process of computer simulations that has been found to be time consuming, particularly if the parameters have to be set conditional on a realistic set of values for the examinees{\textquoteright} ability parameter. Formal properties of the method are identified that help us explain why this iterative process can be slow and does not guarantee admissibility. In addition, some alternatives to the SH method are introduced. The behavior of these alternatives was estimated for an adaptive test from an item pool from the Law School Admission Test (LSAT). Two of the alternatives showed attractive behavior and converged smoothly to admissibility for all items in a relatively small number of iteration steps. }, keywords = {Adaptive Testing, Computer Assisted Testing, Test Items computerized adaptive testing}, author = {van der Linden, W. J.} } @conference {937, title = {Standard-setting issues in computerized-adaptive testing}, booktitle = {Paper Prepared for Presentation at the Annual Conference of the Canadian Society for Studies in Education}, year = {2003}, address = {Halifax, Nova Scotia, May 30th, 2003}, author = {Gushta, M. M.} } @article {124, title = {Statistical detection and estimation of differential item functioning in computerized adaptive testing}, journal = {Dissertation Abstracts International: Section B: The Sciences \& Engineering}, volume = {64}, number = {6-B}, year = {2003}, pages = {2736}, abstract = {Differential item functioning (DIF) is an important issue in large scale standardized testing. DIF refers to the unexpected difference in item performances among groups of equally proficient examinees, usually classified by ethnicity or gender. Its presence could seriously affect the validity of inferences drawn from a test. Various statistical methods have been proposed to detect and estimate DIF. This dissertation addresses DIF analysis in the context of computerized adaptive testing (CAT), whose item selection algorithm adapts to the ability level of each individual examinee. In a CAT, a DIF item may be more consequential and more detrimental be cause fewer items are administered in a CAT than in a traditional paper-and-pencil test and because the remaining sequence of items presented to examinees depends in part on their responses to the DIF item. Consequently, an efficient, stable and flexible method to detect and estimate CAT DIF becomes necessary and increasingly important. We propose simultaneous implementations of online calibration and DIF testing. The idea is to perform online calibration of an item of interest separately in the focal and reference groups. Under any specific parametric IRT model, we can use the (online) estimated latent traits as covariates and fit a nonlinear regression model to each of the two groups. Because of the use of the estimated, not the true , the regression fit has to adjust for the covariate "measurement errors". It turns out that this situation fits nicely into the framework of nonlinear error-in-variable modelling, which has been extensively studied in statistical literature. We develop two bias-correction methods using asymptotic expansion and conditional score theory. After correcting the bias caused by measurement error, one can perform a significance test to detect DIF with the parameter estimates for different groups. This dissertation also discusses some general techniques to handle measurement error modelling with different IRT models, including the three-parameter normal ogive model and polytomous response models. Several methods of estimating DIF are studied as well. Large sample properties are established to justify the proposed methods. Extensive simulation studies show that the resulting methods perform well in terms of Type-I error rate control, accuracy in estimating DIF and power against both unidirectional and crossing DIF. (PsycINFO Database Record (c) 2004 APA, all rights reserved).}, author = {Feng, X.} } @article {92, title = {Strategies for controlling item exposure in computerized adaptive testing with polytomously scored items}, journal = {Dissertation Abstracts International: Section B: The Sciences \& Engineering}, volume = {64}, number = {1-B}, year = {2003}, pages = {458}, abstract = {Choosing a strategy for controlling the exposure of items to examinees has become an integral part of test development for computerized adaptive testing (CAT). Item exposure can be controlled through the use of a variety of algorithms which modify the CAT item selection process. This may be done through a randomization, conditional selection, or stratification approach. The effectiveness of each procedure as well as the degree to which measurement precision is sacrificed has been extensively studied with dichotomously scored item pools. However, only recently have researchers begun to examine these procedures in polytomously scored item pools. The current study investigated the performance of six different exposure control mechanisms under three polytomous IRT models in terms of measurement precision, test security, and ease of implementation. The three models examined in the current study were the partial credit, generalized partial credit, and graded response models. In addition to a no exposure control baseline condition, the randomesque, within .10 logits, Sympson-Hetter, conditional Sympson-Hetter, a-Stratified, and enhanced a-Stratified procedures were implemented to control item exposure rates. The a-Stratified and enhanced a-Stratified procedures were not evaluated with the partial credit model. Two variations of the randomesque and within .10 logits procedures were also examined which varied the size of the item group from which the next item to be administered was randomly selected. The results of this study were remarkably similar for all three models and indicated that the randomesque and within .10 logits procedures, when implemented with the six item group variation, provide the best option for controlling exposure rates when impact to measurement precision and ease of implementation are considered. The three item group variations of the procedures were, however, ineffective in controlling exposure, overlap, and pool utilization rates to desired levels. The Sympson-Hetter and conditional Sympson-Hetter procedures were difficult and time consuming to implement, and while they did control exposure rates to the target level, their performance in terms of item overlap (for the Sympson-Hetter) and pool utilization were disappointing. The a-Stratified and enhanced a-Stratified procedures both turned in surprisingly poor performances across all variables. (PsycINFO Database Record (c) 2004 APA, all rights reserved).}, author = {Davis, L. L.} } @conference {896, title = {Strategies for controlling item exposure in computerized adaptive testing with the generalized partial credit model}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2003}, note = {PDF file, 620 K}, address = {Chicago IL}, author = {Davis, L. L.} } @book {1650, title = {Strategies for controlling testlet exposure rates in computerized adaptive testing systems}, year = {2003}, note = {{PDF file, 485 KB}}, address = {Unpublished Ph.D. Dissertation, The University of Texas at Austin.}, author = {Boyd, A. M} } @article {193, title = {Student modeling and ab initio language learning}, journal = {System}, volume = {31}, number = {4}, year = {2003}, pages = {519-535}, abstract = {Provides examples of student modeling techniques that have been employed in computer-assisted language learning over the past decade. Describes two systems for learning German: "German Tutor" and "Geroline." Shows how a student model can support computerized adaptive language testing for diagnostic purposes in a Web-based language learning environment that does not rely on parsing technology. (Author/VWL)}, author = {Heift, T. and Schulze, M.} } @article {456, title = {A study of the feasibility of Internet administration of a computerized health survey: The Headache Impact Test (HIT)}, journal = {Quality of Life Research}, volume = { 12}, year = {2003}, pages = { 953-961}, author = {Bayliss, M.S. and Dewey, J.E. and Dunlap, I and et. al.} } @article {274, title = {Ten recommendations for advancing patient-centered outcomes measurement for older persons}, journal = {Annals of Internal Medicine}, volume = {139}, number = {5 Pt 2}, year = {2003}, note = {1539-3704Journal ArticleReview}, month = {Sep 2}, pages = {403-409}, abstract = {The past 50 years have seen great progress in the measurement of patient-based outcomes for older populations. Most of the measures now used were created under the umbrella of a set of assumptions and procedures known as classical test theory. A recent alternative for health status assessment is item response theory. Item response theory is superior to classical test theory because it can eliminate test dependency and achieve more precise measurement through computerized adaptive testing. Computerized adaptive testing reduces test administration times and allows varied and precise estimates of ability. Several key challenges must be met before computerized adaptive testing becomes a productive reality. I discuss these challenges for the health assessment of older persons in the form of 10 "Ds": things we need to deliberate, debate, decide, and do.}, keywords = {*Health Status Indicators, Aged, Geriatric Assessment/*methods, Humans, Patient-Centered Care/*methods, Research Support, U.S. Gov{\textquoteright}t, Non-P.H.S.}, author = {McHorney, C. A.} } @conference {1031, title = {Test information targeting strategies for adaptive multistage testlet designs}, booktitle = {Paper presented at the Annual meeting of the National Council on Measurement in Education}, year = {2003}, note = {PDF file, 179 K}, address = {Chicago IL}, author = {Luecht, RM and Burgin, W. L.} } @booklet {1486, title = {Tests adaptativos informatizados (Computerized adaptive testing)}, year = {2003}, note = {[In Spanish]}, address = {Madrid: UNED Ediciones}, author = {Olea, J. and Ponsoda, V.} } @conference {874, title = {Test-score comparability, ability estimation, and item-exposure control in computerized adaptive testing}, booktitle = {Paper presented at the Annual meeting of the National Council on Measurement in Education}, year = {2003}, address = {Chicago IL}, author = {Chang, Hua-Hua and Ying, Z.} } @article {321, title = {Timing behavior in computerized adaptive testing: Response times for correct and incorrect answers are not related to general fluid intelligence/Zum Zeitverhalten beim computergest{\"u}tzten adaptiveb Testen: Antwortlatenzen bei richtigen und falschen L{\"o}sun}, journal = {Zeitschrift f{\"u}r Differentielle und Diagnostische Psychologie}, volume = {24}, number = {1}, year = {2003}, pages = {57-63}, abstract = {Examined the effects of general fluid intelligence on item response times for correct and false responses in computerized adaptive testing. After performing the CFT3 intelligence test, 80 individuals (aged 17-44 yrs) completed perceptual and cognitive discrimination tasks. Results show that response times were related neither to the proficiency dimension reflected by the task nor to the individual level of fluid intelligence. Furthermore, the false > correct-phenomenon as well as substantial positive correlations between item response times for false and correct responses were shown to be independent of intelligence levels. (PsycINFO Database Record (c) 2005 APA )}, keywords = {Adaptive Testing, Cognitive Ability, Intelligence, Perception, Reaction Time computerized adaptive testing}, author = {Rammsayer, Thomas and Brandler, Susanne} } @conference {904, title = {To stratify or not: An investigation of CAT item selection procedures under practical constraints}, booktitle = {Paper presented at the Annual meeting of the National Council on Measurement in Education}, year = {2003}, note = {{PDF file, 186 KB}}, address = {Chicago IL}, author = {Deng, H. and Ansley, T.} } @booklet {1394, title = {Using moving averages to assess test and item security in computer-based testing (Center for Educational Assessment Research Report No 468)}, year = {2003}, address = {Amherst, MA: University of Massachusetts, School of Education.}, author = {Han, N.} } @article {407, title = {Using response times to detect aberrant responses in computerized adaptive testing}, journal = {Psychometrika}, volume = {68}, number = {2}, year = {2003}, pages = {251-265}, abstract = {A lognormal model for response times is used to check response times for aberrances in examinee behavior on computerized adaptive tests. Both classical procedures and Bayesian posterior predictive checks are presented. For a fixed examinee, responses and response times are independent; checks based on response times offer thus information independent of the results of checks on response patterns. Empirical examples of the use of classical and Bayesian checks for detecting two different types of aberrances in response times are presented. The detection rates for the Bayesian checks outperformed those for the classical checks, but at the cost of higher false-alarm rates. A guideline for the choice between the two types of checks is offered.}, keywords = {Adaptive Testing, Behavior, Computer Assisted Testing, computerized adaptive testing, Models, person Fit, Prediction, Reaction Time}, author = {van der Linden, W. J. and van Krimpen-Stoop, E. M. L. A.} } @conference {1015, title = {Accuracy of the ability estimate and the item exposure rate under multidimensional adaptive testing with item constraints}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2002}, note = {$\#$LI02-01}, address = {New Orleans LA}, author = {Li, Y. H. and Yu, N. Y.} } @booklet {1583, title = {Adaptive testing without IRT in the presence of multidimensionality (Research Report 02-09)}, year = {2002}, address = {Princeton NJ: Educational Testing Service}, author = {Yan, D. and Lewis, C. and Stocking, M.} } @article {48, title = {Advances in quality of life measurements in oncology patients}, journal = {Seminars in Oncology}, volume = {29}, number = {3 Suppl 8}, year = {2002}, note = {0093-7754 (Print)Journal ArticleReview}, month = {Jun}, pages = {60-8}, abstract = {Accurate assessment of the quality of life (QOL) of patients can provide important clinical information to physicians, especially in the area of oncology. Changes in QOL are important indicators of the impact of a new cytotoxic therapy, can affect a patient{\textquoteright}s willingness to continue treatment, and may aid in defining response in the absence of quantifiable endpoints such as tumor regression. Because QOL is becoming an increasingly important aspect in the management of patients with malignant disease, it is vital that the instruments used to measure QOL are reliable and accurate. Assessment of QOL involves a multidimensional approach that includes physical, functional, social, and emotional well-being, and the most comprehensive instruments measure at least three of these domains. Instruments to measure QOL can be generic (eg, the Nottingham Health Profile), targeted toward specific illnesses (eg, Functional Assessment of Cancer Therapy - Lung), or be a combination of generic and targeted. Two of the most widely used examples of the combination, or hybrid, instruments are the European Organization for Research and Treatment of Cancer Quality of Life Questionnaire Core 30 Items and the Functional Assessment of Chronic Illness Therapy. A consequence of the increasing international collaboration in clinical trials has been the growing necessity for instruments that are valid across languages and cultures. To assure the continuing reliability and validity of QOL instruments in this regard, item response theory can be applied. Techniques such as item response theory may be used in the future to construct QOL item banks containing large sets of validated questions that represent various levels of QOL domains. As QOL becomes increasingly important in understanding and approaching the overall management of cancer patients, the tools available to clinicians and researchers to assess QOL will continue to evolve. While the instruments currently available provide reliable and valid measurement, further improvements in precision and application are anticipated.}, keywords = {*Quality of Life, *Sickness Impact Profile, Cross-Cultural Comparison, Culture, Humans, Language, Neoplasms/*physiopathology, Questionnaires}, author = {Cella, D. and Chang, C-H. and Lai, J. S. and Webster, K.} } @article {65, title = {Applicable adaptive testing models for school teachers}, journal = {Educational Media International}, volume = {39}, number = {1}, year = {2002}, pages = {55-59}, abstract = {The purpose of this study was to investigate the attitudinal effects on SPRT adaptive testing environment for junior high school students. Subjects were 39 eighth graders from a selected junior high school. Major instruments for the study were the Junior High School Natural Sciences Adaptive Testing System driven by the SPRT algorithm, and a self-developed attitudinal questionnaire, factors examined include: test anxiety, examinee preference, adaptability of the test, and acceptance of the test result. The major findings were that overall, junior high school students" attitudes towards computerized adaptive tests were positive, no significant correlations existed between test attitude and the test length. The results indicated that junior high school students generally have positive attitudes towards adaptive testing.Mod{\`e}les de tests d"adaptation {\`a} l"usage des enseignants. L"objectif de cette {\'e}tude {\'e}tait d"enqu{\^e}ter sur les effets caus{\'e}s par une passation de tests d"adaptation ( selon l"algorithme "Sequential Probability Radio Test " (SPRT) ) dans une classe de trente-neuf {\'e}l{\`e}ves de huiti{\`e}me ann{\'e}e du secondaire inf{\'e}rieur. Les principaux instruments utilis{\'e}s ont {\'e}t{\'e} ceux du syst{\`e}me de tests d"adaptation (avec le SPRT) et destin{\'e} aux classes de sciences naturelles du degr{\'e} secondaire inf{\'e}rieur. Un questionnaire d"attitude, d{\'e}velopp{\'e} par nos soins, a {\'e}galement {\'e}t{\'e} utilis{\'e} pour examiner les facteurs suivants: test d"anxi{\'e}t{\'e}, pr{\'e}f{\'e}rence des candidats, adaptabilit{\'e} du test et acceptation des r{\'e}sultats. Les principales conclusions ont {\'e}t{\'e} que, dans l"ensemble, l"attitude des {\'e}l{\`e}ves du secondaire inf{\'e}rieur face aux tests d"adaptation informatis{\'e}s a {\'e}t{\'e} positive, aucune corr{\'e}lation significative existant entre cette attitude et la longueur des tests. Les r{\'e}sultats d{\'e}montrent aussi que les {\'e}l{\`e}ves du secondaire ont une attitude g{\'e}n{\'e}ralement positive envers les tests d"adaptation.Test Modelle zur Anwendung durch Klassenlehrer Zweck dieser Untersuchung war, die Auswirkungen {\"u}ber die Einstellung von Jun. High School Sch{\"u}lern im Zusammenhang mit dem SPRT Testumfeld zu untersuchen. 39 Achtkl{\"a}ssler einer Jun. High School nahmen an dem Test teil. Die Untersuchung st{\"u}tzte sich haupts{\"a}chlich auf das Jun. High School Natural. Sciences Adaptive Testing System, das auf dem SPRT Rechnungsverfahren basiert sowie einem selbst erstellten Fragebogen mit folgenden Faktoren: Test{\"a}ngste, Pr{\"a}ferenzen der Testperson, Geeignetheit des Tests, Anerkennung des Testergebnisses. Es stellte sich heraus, dass die Einstellung der Studenten zu den Computer adaptierten Tests im allgemeinen positiv waren; es ergaben sich keine bedeutsamen Wechselwirkungen zwischen pers{\"o}nlicher Testeinstellung und Testl{\"a}nge. Die Ergebnisse belegen, dass Jun. High School Sch{\"u}ler im allgemeinen eine positive Haltung zu adaptierten Tests haben. }, author = {Chang-Hwa, W. A. and Chuang, C-L.} } @article {823, title = {Application of an empirical Bayes enhancement of Mantel-Haenszel differential item functioning analysis to a computerized adaptive test}, journal = {Applied Psychological Measurement}, volume = {26}, year = {2002}, pages = {57-76}, author = {Zwick, R. and Thayer, D. T.} } @book {1722, title = {Assessing the efficiency of item selection in computerized adaptive testing}, year = {2002}, address = {Unpublished doctoral dissertation, University of Pittsburgh.}, author = {Weissman, A.} } @article {305, title = {Assessing tobacco beliefs among youth using item response theory models}, journal = {Drug and Alcohol Dependence}, volume = {68}, number = {Suppl 1}, year = {2002}, note = {0376-8716Journal Article}, month = {Nov}, pages = {S21-S39}, abstract = {Successful intervention research programs to prevent adolescent smoking require well-chosen, psychometrically sound instruments for assessing smoking prevalence and attitudes. Twelve thousand eight hundred and ten adolescents were surveyed about their smoking beliefs as part of the Teenage Attitudes and Practices Survey project, a prospective cohort study of predictors of smoking initiation among US adolescents. Item response theory (IRT) methods are used to frame a discussion of questions that a researcher might ask when selecting an optimal item set. IRT methods are especially useful for choosing items during instrument development, trait scoring, evaluating item functioning across groups, and creating optimal item subsets for use in specialized applications such as computerized adaptive testing. Data analytic steps for IRT modeling are reviewed for evaluating item quality and differential item functioning across subgroups of gender, age, and smoking status. Implications and challenges in the use of these methods for tobacco onset research and for assessing the developmental trajectories of smoking among youth are discussed.}, keywords = {*Attitude to Health, *Culture, *Health Behavior, *Questionnaires, Adolescent, Adult, Child, Female, Humans, Male, Models, Statistical, Smoking/*epidemiology}, author = {Panter, A. T. and Reeve, B. B.} } @article {761, title = {Can examinees use judgments of item difficulty to improve proficiency estimates on computerized adaptive vocabulary tests? }, journal = {Journal of Educational Measurement}, volume = {39}, year = {2002}, pages = { 311-330}, author = {Vispoel, W. P. and Clough, S. J. and Bleiler, T. and Hendrickson, A. B. and Ihrig, D.} } @conference {1002, title = {Comparing three item selection approaches for computerized adaptive testing with content balancing requirement}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2002}, note = {{PDF file, 226 KB}}, address = {New Orleans LA}, author = {Leung, C-K.. and Chang, Hua-Hua and Hau, K-T.} } @conference {1170, title = {A comparison of computer mastery models when pool characteristics vary}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2002}, note = {{PDF file, 692 KB}}, address = {New Orleans LA}, author = {Smith, R. L. and Lewis, C.} } @article {308, title = {A comparison of item selection techniques and exposure control mechanisms in CATs using the generalized partial credit model}, journal = {Applied Psychological Measurement}, volume = {26}, number = {2}, year = {2002}, pages = {147-163}, abstract = {The use of more performance items in large-scale testing has led to an increase in the research investigating the use of polytomously scored items in computer adaptive testing (CAT). Because this research has to be complemented with information pertaining to exposure control, the present research investigated the impact of using five different exposure control algorithms in two sized item pools calibrated using the generalized partial credit model. The results of the simulation study indicated that the a-stratified design, in comparison to a no-exposure control condition, could be used to reduce item exposure and overlap, increase pool utilization, and only minorly degrade measurement precision. Use of the more restrictive exposure control algorithms, such as the Sympson-Hetter and conditional Sympson-Hetter, controlled exposure to a greater extent but at the cost of measurement precision. Because convergence of the exposure control parameters was problematic for some of the more restrictive exposure control algorithms, use of the more simplistic exposure control mechanisms, particularly when the test length to item pool size ratio is large, is recommended. (PsycINFO Database Record (c) 2005 APA ) (journal abstract)}, keywords = {(Statistical), Adaptive Testing, Algorithms computerized adaptive testing, Computer Assisted Testing, Item Analysis, Item Response Theory, Mathematical Modeling}, author = {Pastor, D. A. and Dodd, B. G. and Chang, Hua-Hua} } @article {572, title = {A comparison of non-deterministic procedures for the adaptive assessment of knowledge}, journal = {Psychologische Beitrge}, volume = {44}, year = {2002}, pages = {495-503}, author = {Hockemeyer, C.} } @conference {959, title = {Comparison of the psychometric properties of several computer-based test designs for credentialing exams}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2002}, note = {{PDF file, 261 KB}}, address = {New Orleans LA}, author = {Jodoin, M. and Zenisky, A. L. and Hambleton, R. K.} } @article {391, title = {Computer adaptive testing: The impact of test characteristics on perceived performance and test takers{\textquoteright} reactions}, journal = {Dissertation Abstracts International: Section B: the Sciences \& Engineering}, volume = {62}, number = {7-B}, year = {2002}, pages = {3410}, abstract = {This study examined the relationship between characteristics of adaptive testing and test takers{\textquoteright} subsequent reactions to the test. Participants took a computer adaptive test in which two features, the difficulty of the initial item and the difficulty of subsequent items, were manipulated. These two features of adaptive testing determined the number of items answered correctly by examinees and their subsequent reactions to the test. The data show that the relationship between test characteristics and reactions was fully mediated by perceived performance on the test. In addition, the impact of feedback on reactions to adaptive testing was also evaluated. In general, feedback that was consistent with perceptions of performance had a positive impact on reactions to the test. Implications for adaptive test design concerning maximizing test takers{\textquoteright} reactions are discussed. (PsycINFO Database Record (c) 2003 APA, all rights reserved).}, keywords = {computerized adaptive testing}, author = {Tonidandel, S.} } @article {728, title = {Computer-adaptive testing: The impact of test characteristics on perceived performance and test takers{\textquoteright} reactions}, journal = {Journal of Applied Psychology}, volume = {87}, year = {2002}, pages = {320-332}, author = {Tonidandel, S. and Qui{\~n}ones, M. A. and Adams, A. A.} } @article {242, title = {Computerised adaptive testing}, journal = {British Journal of Educational Technology}, volume = {33}, number = {5}, year = {2002}, pages = {619-22}, abstract = {Considers the potential of computer adaptive testing (CAT). Discusses the use of CAT instead of traditional paper and pencil tests, identifies decisions that impact the efficacy of CAT, and concludes that CAT is beneficial when used to its full potential on certain types of tests. (LRW)}, keywords = {computerized adaptive testing}, author = {Latu, E. and Chapman, E.} } @conference {1150, title = {Confirmatory item factor analysis using Markov chain Monte Carlo estimation with applications to online calibration in CAT}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2002}, address = {New Orleans, LA}, author = {Segall, D. O.} } @booklet {1539, title = {Constraining item exposure in computerized adaptive testing with shadow tests (Research Report No. 02-06)}, year = {2002}, address = {University of Twente, The Netherlands}, author = {van der Linden, W. J. and Veldkamp, B. P.} } @conference {932, title = {Content-stratified random item selection in computerized classification testing}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2002}, note = {$\#$GU02-01}, address = {New Orleans LA}, author = {Guille, R. Lipner, R. S. and Norcini, J. J.} } @inbook {1751, title = {Controlling item exposure and maintaining item security}, year = {2002}, address = {C. N. Mills, M. T. Potenza, and J. J. Fremer (Eds.), Computer-Based Testing: Building the Foundation for Future Assessments (pp. 165-191). Mahwah, NJ: Lawrence Erlbaum Associates, Inc.}, author = {Davey, T. and Nering, M.} } @article {14, title = {Data sparseness and on-line pretest item calibration-scaling methods in CAT}, journal = {Journal of Educational Measurement}, volume = {39}, number = {3}, year = {2002}, pages = {207-218}, abstract = {Compared and evaluated 3 on-line pretest item calibration-scaling methods (the marginal maximum likelihood estimate with 1 expectation maximization [EM] cycle [OEM] method, the marginal maximum likelihood estimate with multiple EM cycles [MEM] method, and M. L. Stocking{\textquoteright}s Method B) in terms of item parameter recovery when the item responses to the pretest items in the pool are sparse. Simulations of computerized adaptive tests were used to evaluate the results yielded by the three methods. The MEM method produced the smallest average total error in parameter estimation, and the OEM method yielded the largest total error (PsycINFO Database Record (c) 2005 APA )}, keywords = {Computer Assisted Testing, Educational Measurement, Item Response Theory, Maximum Likelihood, Methodology, Scaling (Testing), Statistical Data}, author = {Ban, J-C. and Hanson, B. A. and Yi, Q. and Harris, D. J.} } @article {411, title = {Detection of person misfit in computerized adaptive tests with polytomous items}, journal = {Applied Psychological Measurement}, volume = {26}, number = {2}, year = {2002}, pages = {164-180}, abstract = {Item scores that do not fit an assumed item response theory model may cause the latent trait value to be inaccurately estimated. For a computerized adaptive test (CAT) using dichotomous items, several person-fit statistics for detecting mis.tting item score patterns have been proposed. Both for paper-and-pencil (P\&P) tests and CATs, detection ofperson mis.t with polytomous items is hardly explored. In this study, the nominal and empirical null distributions ofthe standardized log-likelihood statistic for polytomous items are compared both for P\&P tests and CATs. Results showed that the empirical distribution of this statistic differed from the assumed standard normal distribution for both P\&P tests and CATs. Second, a new person-fit statistic based on the cumulative sum (CUSUM) procedure from statistical process control was proposed. By means ofsimulated data, critical values were determined that can be used to classify a pattern as fitting or misfitting. The effectiveness of the CUSUM to detect simulees with item preknowledge was investigated. Detection rates using the CUSUM were high for realistic numbers ofdisclosed items. }, author = {van Krimpen-Stoop, E. M. L. A. and Meijer, R. R.} } @conference {1197, title = {Developing tailored instruments: Item banking and computerized adaptive assessment}, booktitle = {Paper presented at the conference {\textquotedblleft}Advances in Health Outcomes Measurement}, year = {2002}, note = {{PDF file, 170 KB}}, address = {{\textquotedblright} Bethesda, Maryland, June 23-25}, author = {Thissen, D.} } @conference {1016, title = {The development and evaluation of a computer-adaptive testing application for English language}, booktitle = {Paper presented at the 2002 Computer-Assisted Testing Conference}, year = {2002}, note = {{PDF file, 308 KB}}, address = {United Kingdom}, author = {Lilley, M and Barker, T} } @article {187, title = {Development of an index of physical functional health status in rehabilitation}, journal = {Archives of Physical Medicine and Rehabilitation}, volume = {83}, number = {5}, year = {2002}, note = {0003-9993 (Print)Journal Article}, month = {May}, pages = {655-65}, abstract = {OBJECTIVE: To describe (1) the development of an index of physical functional health status (FHS) and (2) its hierarchical structure, unidimensionality, reproducibility of item calibrations, and practical application. DESIGN: Rasch analysis of existing data sets. SETTING: A total of 715 acute, orthopedic outpatient centers and 62 long-term care facilities in 41 states participating with Focus On Therapeutic Outcomes, Inc. PATIENTS: A convenience sample of 92,343 patients (40\% male; mean age +/- standard deviation [SD], 48+/-17y; range, 14-99y) seeking rehabilitation between 1993 and 1999. INTERVENTIONS: Not applicable. MAIN OUTCOME MEASURES: Patients completed self-report health status surveys at admission and discharge. The Medical Outcomes Study 36-Item Short-Form Health Survey{\textquoteright}s physical functioning scale (PF-10) is the foundation of the physical FHS. The Oswestry Low Back Pain Disability Questionnaire, Neck Disability Index, Lysholm Knee Questionnaire, items pertinent to patients with upper-extremity impairments, and items pertinent to patients with more involved neuromusculoskeletal impairments were cocalibrated into the PF-10. RESULTS: The final FHS item bank contained 36 items (patient separation, 2.3; root mean square measurement error, 5.9; mean square +/- SD infit, 0.9+/-0.5; outfit, 0.9+/-0.9). Analyses supported empirical item hierarchy, unidimensionality, reproducibility of item calibrations, and content and construct validity of the FHS-36. CONCLUSIONS: Results support the reliability and validity of FHS-36 measures in the present sample. Analyses show the potential for a dynamic, computer-controlled, adaptive survey for FHS assessment applicable for group analysis and clinical decision making for individual patients.}, keywords = {*Health Status Indicators, *Rehabilitation Centers, Adolescent, Adult, Aged, Aged, 80 and over, Female, Health Surveys, Humans, Male, Middle Aged, Musculoskeletal Diseases/*physiopathology/*rehabilitation, Nervous System Diseases/*physiopathology/*rehabilitation, Physical Fitness/*physiology, Recovery of Function/physiology, Reproducibility of Results, Retrospective Studies}, author = {Hart, D. L. and Wright, B. D.} } @conference {1053, title = {The Development of STAR Early Literacy}, booktitle = {Presentation to the 32rd Annual National Conference on Large-Scale Assessment.}, year = {2002}, address = {Desert Springs CA}, author = {J. R. McBride} } @mastersthesis {1978, title = {DEVELOPMENT, RELIABILITY, AND VALIDITY OF A COMPUTERIZED ADAPTIVE VERSION OF THE SCHEDULE FOR NONADAPTIVE AND ADAPTIVE PERSONALITY}, year = {2002}, address = {Unpublished Ph. D. dissertation, University of Iowa, Iowa City Iowa}, author = {Simms, L. J.} } @article {335, title = {The effect of test characteristics on aberrant response patterns in computer adaptive testing}, journal = {Dissertation Abstracts International Section A: Humanities \& Social Sciences}, volume = {62}, number = {10-A}, year = {2002}, pages = {3363}, abstract = {The advantages that computer adaptive testing offers over linear tests have been well documented. The Computer Adaptive Test (CAT) design is more efficient than the Linear test design as fewer items are needed to estimate an examinee{\textquoteright}s proficiency to a desired level of precision. In the ideal situation, a CAT will result in examinees answering different number of items according to the stopping rule employed. Unfortunately, the realities of testing conditions have necessitated the imposition of time and minimum test length limits on CATs. Such constraints might place a burden on the CAT test taker resulting in aberrant response behaviors by some examinees. Occurrence of such response patterns results in inaccurate estimation of examinee proficiency levels. This study examined the effects of test lengths, time limits and the interaction of these factors with the examinee proficiency levels on the occurrence of aberrant response patterns. The focus of the study was on the aberrant behaviors caused by rushed guessing due to restrictive time limits. Four different testing scenarios were examined; fixed length performance tests with and without content constraints, fixed length mastery tests and variable length mastery tests without content constraints. For each of these testing scenarios, the effect of two test lengths, five different timing conditions and the interaction between these factors with three ability levels on ability estimation were examined. For fixed and variable length mastery tests, decision accuracy was also looked at in addition to the estimation accuracy. Several indices were used to evaluate the estimation and decision accuracy for different testing conditions. The results showed that changing time limits had a significant impact on the occurrence of aberrant response patterns conditional on ability. Increasing test length had negligible if not negative effect on ability estimation when rushed guessing occured. In case of performance testing high ability examinees while in classification testing middle ability examinees suffered the most. The decision accuracy was considerably affected in case of variable length classification tests. (PsycINFO Database Record (c) 2003 APA, all rights reserved).}, keywords = {computerized adaptive testing}, author = {Rizavi, S. M.} } @article {370, title = {An EM approach to parameter estimation for the Zinnes and Griggs paired comparison IRT model}, journal = {Applied Psychological Measurement}, volume = {26}, number = {2}, year = {2002}, pages = {208-227}, abstract = {Borman et al. recently proposed a computer adaptive performance appraisal system called CARS II that utilizes paired comparison judgments of behavioral stimuli. To implement this approach,the paired comparison ideal point model developed by Zinnes and Griggs was selected. In this article,the authors describe item response and information functions for the Zinnes and Griggs model and present procedures for estimating stimulus and person parameters. Monte carlo simulations were conducted to assess the accuracy of the parameter estimation procedures. The results indicated that at least 400 ratees (i.e.,ratings) are required to obtain reasonably accurate estimates of the stimulus parameters and their standard errors. In addition,latent trait estimation improves as test length increases. The implications of these results for test construction are also discussed. }, keywords = {Adaptive Testing, Computer Assisted Testing, Item Response Theory, Maximum Likelihood, Personnel Evaluation, Statistical Correlation, Statistical Estimation}, author = {Stark, S. and F Drasgow} } @conference {972, title = {An empirical comparison of achievement level estimates from adaptive tests and paper-and-pencil tests}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2002}, note = {{PDF file, 134 KB}}, address = {New Orleans LA}, author = {Kingsbury, G. G.} } @conference {223, title = {An empirical comparison of achievement level estimates from adaptive tests and paper-and-pencil tests}, booktitle = {annual meeting of the American Educational Research Association}, year = {2002}, address = {New Orleans, LA. USA}, keywords = {computerized adaptive testing}, author = {Kingsbury, G. G.} } @booklet {1588, title = {An empirical investigation of selected multi-stage testing design variables on test assembly and decision accuracy outcomes for credentialing exams (Center for Educational Assessment Research Report No 469)}, year = {2002}, address = {Amherst, MA: University of Massachusetts, School of Education.}, author = {Zenisky, A. L.} } @conference {1203, title = {Employing new ideas in CAT to a simulated reading test}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2002}, note = {{PDF file, 216 KB}}, address = {New Orleans LA}, author = {Thompson, T.} } @article {672, title = {{\'E}tude de la distribution d{\textquoteright}{\'e}chantillonnage de l{\textquoteright}estimateur du niveau d{\textquoteright}habilet{\'e} en testing adaptatif en fonction de deux r{\`e}gles d{\textquoteright}arr{\^e}t dans le contexte de l{\textquoteright}application du mod{\`e}le de Rasch [Study of the sampling distribution of the proficiecy estima}, journal = {Mesure et {\'e}valuation en {\'e}ducation}, volume = {24(2-3)}, year = {2002}, note = {(In French)}, pages = {23-40}, author = {Ra{\^\i}che, G. and Blais, J-G.} } @article {412, title = {Evaluation of selection procedures for computerized adaptive testing with polytomous items}, journal = {Applied Psychological Measurement}, volume = {26}, number = {4}, year = {2002}, note = {References .Sage Publications, US}, pages = {393-411}, abstract = {In the present study, a procedure that has been used to select dichotomous items in computerized adaptive testing was applied to polytomous items. This procedure was designed to select the item with maximum weighted information. In a simulation study, the item information function was integrated over a fixed interval of ability values and the item with the maximum area was selected. This maximum interval information item selection procedure was compared to a maximum point information item selection procedure. Substantial differences between the two item selection procedures were not found when computerized adaptive tests were evaluated on bias and the root mean square of the ability estimate. }, keywords = {computerized adaptive testing}, author = {van Rijn, P. W. and Theo Eggen and Hemker, B. T. and Sanders, P. F.} } @article {681, title = {Evaluation of selection procedures for computerized adaptive testing with polytomous items}, journal = {Applied Psychological Measurement}, volume = {26}, year = {2002}, pages = {393-411}, author = {van Rijn, P. W. and Theo Eggen and Hemker, B. T. and Sanders, P. F.} } @conference {1134, title = {An examination of decision-theory adaptive testing procedures}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2002}, note = {{PDF file, 46 KB}}, address = {New Orleans, LA}, author = {Rudner, L. M.} } @booklet {1624, title = {An exploration of potentially problematic adaptive tests}, year = {2002}, note = {Princeton NJ: Educational Testing Service.}, address = {Research Report 02-05)}, author = {Stocking, M. and Steffen, M. Golub-Smith, M. L. and Eignor, D. R.} } @conference {857, title = {Fairness issues in adaptive tests with strict time limits}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2002}, note = {{PDF file, 1.287 MB}}, address = {New Orleans LA}, author = {Bridgeman, B. and Cline, F.} } @article {523, title = {Feasibility and acceptability of computerized adaptive testing (CAT) for fatigue monitoring in clinical practice}, journal = {Quality of Life Research}, volume = {11(7)}, year = {2002}, pages = {134}, author = {Davis, K. M. and Chang, C-H. and Lai, J-S. and Cella, D.} } @booklet {1323, title = {A feasibility study of on-the-fly item generation in adaptive testing (GRE Board Report No 98-12)}, year = {2002}, address = {Educational Testing Service RR02-23. Princeton NJ: Educational Testing Service. Note = {\textquotedblleft}{PDF file, 193 KB}}, author = {Bejar, I. I. and Lawless, R. R and Morley, M. E and Wagner, M. E. and Bennett, R. E. and Revuelta, J.} } @conference {920, title = {A further study on adjusting CAT item selection starting point for individual examinees}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2002}, note = {$\#$FA02-01}, address = {New Orleans LA}, author = {Fan, M. and Zhu.} } @inbook {119, title = {Generating abstract reasoning items with cognitive theory}, booktitle = {Item generation for test development}, year = {2002}, note = {Using Smart Source ParsingItem generation for test development. (pp. 219-250). Mahwah, NJ : Lawrence Erlbaum Associates, Publishers. xxxii, 412 pp}, pages = {219-250}, publisher = {Lawrence Erlbaum Associates, Inc.}, organization = {Lawrence Erlbaum Associates, Inc.}, address = {Mahwah, N.J. USA}, abstract = {(From the chapter) Developed and evaluated a generative system for abstract reasoning items based on cognitive theory. The cognitive design system approach was applied to generate matrix completion problems. Study 1 involved developing the cognitive theory with 191 college students who were administered Set I and Set II of the Advanced Progressive Matrices. Study 2 examined item generation by cognitive theory. Study 3 explored the psychometric properties and construct representation of abstract reasoning test items with 728 young adults. Five structurally equivalent forms of Abstract Reasoning Test (ART) items were prepared from the generated item bank and administered to the Ss. In Study 4, the nomothetic span of construct validity of the generated items was examined with 728 young adults who were administered ART items, and 217 young adults who were administered ART items and the Advanced Progressive Matrices. Results indicate the matrix completion items were effectively generated by the cognitive design system approach. (PsycINFO Database Record (c) 2005 APA )}, keywords = {Cognitive Processes, Measurement, Reasoning, Test Construction, Test Items, Test Validity, Theories}, author = {Embretson, S. E.}, editor = {P. Kyllomen} } @conference {853, title = {Historique et concepts propres au testing adaptatif [Adaptive testing: Historical accounts and concepts]}, booktitle = {Presented at the 69th Congress of the Acfas. Sherbrooke: Association canadienne fran{\c c}aise pour l{\textquoteright}avancement des sciences (Acfas). [In French]}, year = {2002}, author = {Blais, J. G.} } @article {60, title = {Hypergeometric family and item overlap rates in computerized adaptive testing}, journal = {Psychometrika}, volume = {67}, number = {3}, year = {2002}, pages = {387-398}, abstract = {A computerized adaptive test (CAT) is usually administered to small groups of examinees at frequent time intervals. It is often the case that examinees who take the test earlier share information with examinees who will take the test later, thus increasing the risk that many items may become known. Item overlap rate for a group of examinees refers to the number of overlapping items encountered by these examinees divided by the test length. For a specific item pool, different item selection algorithms may yield different item overlap rates. An important issue in designing a good CAT item selection algorithm is to keep item overlap rate below a preset level. In doing so, it is important to investigate what the lowest rate could be for all possible item selection algorithms. In this paper we rigorously prove that if every item had an equal possibility to be selected from the pool in a fixed-length CAT, the number of overlapping item among any α randomly sampled examinees follows the hypergeometric distribution family for α >= 1. Thus, the expected values of the number of overlapping items among any randomly sampled α examinee can be calculated precisely. These values may serve as benchmarks in controlling item overlap rates for fixed-length adaptive tests. (PsycINFO Database Record (c) 2005 APA )}, keywords = {Adaptive Testing, Algorithms, Computer Assisted Testing, Taking, Test, Time On Task computerized adaptive testing}, author = {Chang, Hua-Hua and Zhang, J.} } @conference {875, title = {Identify the lower bounds for item sharing and item pooling in computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2002}, address = {New Orleans LA}, author = {Chang, Hua-Hua and Zhang, J.} } @conference {939, title = {Impact of item quality and item bank size on the psychometric quality of computer-based credentialing exams}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2002}, address = {New Orleans LA}, author = {Hambleton, R. K.} } @conference {940, title = {Impact of selected factors on the psychometric quality of credentialing examinations administered with a sequential testlet design}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2002}, address = {New Orleans LA}, author = {Hambleton, R. K. and Jodoin, M. and Zenisky, A. L.} } @conference {1274, title = {Impact of test design, item quality and item bank size on the psychometric properties of computer-based credentialing exams}, booktitle = {Paper presented at the meeting of National Council on Measurement in Education}, year = {2002}, note = {PDF file, 500 K}, address = {New Orleans}, author = {Xing, D. and Hambleton, R. K.} } @article {161, title = {The implications of the use of non-optimal items in a Computer Adaptive Testing (CAT) environment}, journal = {Dissertation Abstracts International: Section B: the Sciences \& Engineering}, volume = {63}, number = {3-B}, year = {2002}, pages = {1606}, abstract = {This study describes the effects of manipulating item difficulty in a computer adaptive testing (CAT) environment. There are many potential benefits when using CATS as compared to traditional tests. These include increased security, shorter tests, and more precise measurement. According to IRT, the theory underlying CAT, as the computer continually recalculates ability, items that match that current estimate of ability are administered. Such items provide maximum information about examinees during the test. Herein, however, lies a potential problem. These optimal CAT items result in an examinee having only a 50\% chance of a correct response. Some examinees may consider such items unduly challenging. Further, when test anxiety is a factor, it is possible that test scores may be negatively affected. This research was undertaken to determine the effects of administering easier CAT items on ability estimation and test length using computer simulations. Also considered was the administration of different numbers of initial items prior to the start of the adaptive portion of the test, using three different levels of measurement precision. Results indicate that regardless of the number of initial items administered, the level of precision employed, or the modifications made to item difficulty, the approximation of estimated ability to true ability is good in all cases. Additionally, the standard deviations of the ability estimates closely approximate the theoretical levels of precision used as stopping rules for the simulated CATs. Since optimal CAT items are not used, each item administered provides less information about examinees than optimal CAT items. This results in longer tests. Fortunately, using easier items that provide up to a 66.4\% chance of a correct response results in tests that only modestly increase in length, across levels of precision. For larger standard errors, even easier items (up to a 73.5\% chance of a correct response) result in only negligible to modest increases in test length. Examinees who find optimal CAT items difficult or examinees with test anxiety may find CATs that implement easier items enhance the already existing benefits of CAT. (PsycINFO Database Record (c) 2003 APA, all rights reserved).}, keywords = {computerized adaptive testing}, author = {Grodenchik, D. J.} } @conference {1277, title = {Incorporating the Sympson-Hetter exposure control method into the a-stratified method with content blocking}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2002}, note = {PDF file, 387 K}, address = {New Orleans, LA}, author = {Yi, Q.} } @article {12, title = {Information technology and literacy assessment}, journal = {Reading and Writing Quarterly}, volume = {18}, number = {4}, year = {2002}, pages = {369-373}, abstract = {This column discusses information technology and literacy assessment in the past and present. The author also describes computer-based assessments today including the following topics: computer-scored testing, computer-administered formal assessment, Internet formal assessment, computerized adaptive tests, placement tests, informal assessment, electronic portfolios, information management, and Internet information dissemination. A model of the major present-day applications of information technologies in reading and literacy assessment is also included. (PsycINFO Database Record (c) 2005 APA )}, keywords = {Computer Applications, Computer Assisted Testing, Information, Internet, Literacy, Models, Systems, Technology}, author = {Balajthy, E.} } @inbook {1866, title = {Innovative item types for computerized testing}, year = {2002}, address = {In W. J. van der Linden and C. A. W. Glas (Eds.), Computerized adaptive testing: Theory and practice. Norwell MA: Kluwer (in press).}, author = {Parshall, C. G. and Davey, T. and Pashley, P.} } @conference {1160, title = {An investigation of procedures for estimating error indexes in proficiency estimation in CAT}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2002}, address = {New Orleans LA}, author = {Shyu, C.-Y. and Fan, M. and Thompson, T, and Hsu, Y.} } @article {354, title = {An item response model for characterizing test compromise}, journal = {Journal of Educational and Behavioral Statistics}, volume = {27}, number = {2}, year = {2002}, note = {References .American Educational Research Assn, US}, pages = {163-179}, abstract = {This article presents an item response model for characterizing test-compromise that enables the estimation of item-preview and score-gain distributions observed in on-demand high-stakes testing programs. Model parameters and posterior distributions are estimated by Markov Chain Monte Carlo (MCMC) procedures. Results of a simulation study suggest that when at least some of the items taken by a small sample of test takers are known to be secure (uncompromised), the procedure can provide useful summaries of test-compromise and its impact on test scores. The article includes discussions of operational use of the proposed procedure, possible model violations and extensions, and application to computerized adaptive testing. }, keywords = {computerized adaptive testing}, author = {Segall, D. O.} } @article {612, title = {Item selection in computerized adaptive testing: Improving the a-stratified design with the Sympson-Hetter algorithm}, journal = {Applied Psychological Measurement}, volume = {26}, year = {2002}, pages = {376-392}, author = {Leung, C-K.. and Chang, Hua-Hua and Hau, K-T.} } @article {248, title = {Item selection in computerized adaptive testing: Improving the a-stratified design with the Sympson-Hetter algorithm}, journal = {Applied Psychological Measurement}, volume = {26}, number = {4}, year = {2002}, pages = {376-392}, isbn = {0146-6216}, author = {Leung, C. K. and Chang, Hua-Hua and Hau, K. T.} } @article {671, title = {La simulation d{\textquoteright}un test adaptatif bas{\'e} sur le mod{\`e}le de Rasch [Simulation of a Rasch-based adaptive test]}, journal = {Mesure et {\'e}valuation en {\'e}ducation.}, year = {2002}, note = {(In French) {PDF file, 30 KB}}, author = {Ra{\^\i}che, G.} } @inbook {1871, title = {Le testing adaptatif [Adaptive testing]}, year = {2002}, note = {(In French) {PDF file, 191 KB}}, address = {D. R. Bertrand and J.G. Blais (Eds) : Les th{\'e}ories modernes de la mesure [Modern theories of measurement]. Sainte-Foy: Presses de l{\textquoteright}Universit{\'e} du Qu{\'e}bec.}, author = {Ra{\^\i}che, G.} } @conference {1057, title = {Mapping the Development of Pre-reading Skills with STAR Early Literacy}, booktitle = {Presentation to the Annual Meeting of the Society for the Scientific Study of Reading. Chicago.}, year = {2002}, author = {J. R. McBride and Tardrew, S.P.} } @article {418, title = {Mathematical-programming approaches to test item pool design}, number = {RR 02-09}, year = {2002}, note = {Using Smart Source ParsingAdvances in psychology research, Vol. ( Hauppauge, NY : Nova Science Publishers, Inc, [URL:http://www.Novapublishers.com]. vi, 228 pp}, pages = {93-108}, institution = {University of Twente, Faculty of Educational Science and Technology}, address = {Twente, The Netherlands}, abstract = {(From the chapter) This paper presents an approach to item pool design that has the potential to improve on the quality of current item pools in educational and psychological testing and hence to increase both measurement precision and validity. The approach consists of the application of mathematical programming techniques to calculate optimal blueprints for item pools. These blueprints can be used to guide the item-writing process. Three different types of design problems are discussed, namely for item pools for linear tests, item pools computerized adaptive testing (CAT), and systems of rotating item pools for CAT. The paper concludes with an empirical example of the problem of designing a system of rotating item pools for CAT.}, keywords = {Adaptive Testing, Computer Assisted, Computer Programming, Educational Measurement, Item Response Theory, Mathematics, Psychometrics, Statistical Rotation computerized adaptive testing, Test Items, Testing}, isbn = {02-09}, author = {Veldkamp, B. P. and van der Linden, W. J. and Ariel, A.} } @article {50, title = {Measuring quality of life in chronic illness: the functional assessment of chronic illness therapy measurement system}, journal = {Archives of Physical Medicine and Rehabilitation}, volume = {83}, number = {12 Suppl 2}, year = {2002}, note = {0003-9993Journal Article}, month = {Dec}, pages = {S10-7}, abstract = {We focus on quality of life (QOL) measurement as applied to chronic illness. There are 2 major types of health-related quality of life (HRQOL) instruments-generic health status and targeted. Generic instruments offer the opportunity to compare results across patient and population cohorts, and some can provide normative or benchmark data from which to interpret results. Targeted instruments ask questions that focus more on the specific condition or treatment under study and, as a result, tend to be more responsive to clinically important changes than generic instruments. Each type of instrument has a place in the assessment of HRQOL in chronic illness, and consideration of the relative advantages and disadvantages of the 2 options best drives choice of instrument. The Functional Assessment of Chronic Illness Therapy (FACIT) system of HRQOL measurement is a hybrid of the 2 approaches. The FACIT system combines a core general measure with supplemental measures targeted toward specific diseases, conditions, or treatments. Thus, it capitalizes on the strengths of each type of measure. Recently, FACIT questionnaires were administered to a representative sample of the general population with results used to derive FACIT norms. These normative data can be used for benchmarking and to better understand changes in HRQOL that are often seen in clinical trials. Future directions in HRQOL assessment include test equating, item banking, and computerized adaptive testing.}, keywords = {*Chronic Disease, *Quality of Life, *Rehabilitation, Adult, Comparative Study, Health Status Indicators, Humans, Psychometrics, Questionnaires, Research Support, U.S. Gov{\textquoteright}t, P.H.S., Sensitivity and Specificity}, author = {Cella, D. and Nowinski, C. J.} } @booklet {1439, title = {MIRTCAT [computer software]}, year = {2002}, address = {Upper Marlboro MD: Author}, author = {Li, Y. H.} } @booklet {1533, title = {Modifications of the Sympson-Hetter method for item-exposure control in computerized adaptive testing}, year = {2002}, address = {Manuscript submitted for publication}, author = {van der Linden, W. J.} } @article {146, title = {Multidimensional adaptive testing for mental health problems in primary care}, journal = {Medical Care}, volume = {40}, number = {9}, year = {2002}, note = {Gardner, WilliamKelleher, Kelly JPajer, Kathleen AMCJ-177022/PHS HHS/MH30915/MH/NIMH NIH HHS/MH50629/MH/NIMH NIH HHS/Med Care. 2002 Sep;40(9):812-23.}, month = {Sep}, pages = {812-23}, edition = {2002/09/10}, abstract = {OBJECTIVES: Efficient and accurate instruments for assessing child psychopathology are increasingly important in clinical practice and research. For example, screening in primary care settings can identify children and adolescents with disorders that may otherwise go undetected. However, primary care offices are notorious for the brevity of visits and screening must not burden patients or staff with long questionnaires. One solution is to shorten assessment instruments, but dropping questions typically makes an instrument less accurate. An alternative is adaptive testing, in which a computer selects the items to be asked of a patient based on the patient{\textquoteright}s previous responses. This research used a simulation to test a child mental health screen based on this technology. RESEARCH DESIGN: Using half of a large sample of data, a computerized version was developed of the Pediatric Symptom Checklist (PSC), a parental-report psychosocial problem screen. With the unused data, a simulation was conducted to determine whether the Adaptive PSC can reproduce the results of the full PSC with greater efficiency. SUBJECTS: PSCs were completed by parents on 21,150 children seen in a national sample of primary care practices. RESULTS: Four latent psychosocial problem dimensions were identified through factor analysis: internalizing problems, externalizing problems, attention problems, and school problems. A simulated adaptive test measuring these traits asked an average of 11.6 questions per patient, and asked five or fewer questions for 49\% of the sample. There was high agreement between the adaptive test and the full (35-item) PSC: only 1.3\% of screening decisions were discordant (kappa = 0.93). This agreement was higher than that obtained using a comparable length (12-item) short-form PSC (3.2\% of decisions discordant; kappa = 0.84). CONCLUSIONS: Multidimensional adaptive testing may be an accurate and efficient technology for screening for mental health problems in primary care settings.}, keywords = {Adolescent, Child, Child Behavior Disorders/*diagnosis, Child Health Services/*organization \& administration, Factor Analysis, Statistical, Female, Humans, Linear Models, Male, Mass Screening/*methods, Parents, Primary Health Care/*organization \& administration}, isbn = {0025-7079 (Print)0025-7079 (Linking)}, author = {Gardner, W. and Kelleher, K. J. and Pajer, K. A.} } @article {417, title = {Multidimensional adaptive testing with constraints on test content}, journal = {Psychometrika}, volume = {67}, number = {4}, year = {2002}, note = {Psychometric Society, US}, pages = {575-588}, abstract = {The case of adaptive testing under a multidimensional response model with large numbers of constraints on the content of the test is addressed. The items in the test are selected using a shadow test approach. The 0{\textendash}1 linear programming model that assembles the shadow tests maximizes posterior expected Kullback-Leibler information in the test. The procedure is illustrated for five different cases of multidimensionality. These cases differ in (a) the numbers of ability dimensions that are intentional or should be considered as ldquonuisance dimensionsrdquo and (b) whether the test should or should not display a simple structure with respect to the intentional ability dimensions.}, author = {Veldkamp, B. P. and van der Linden, W. J.} } @conference {1259, title = {Optimum number of strata in the a-stratified adaptive testing design}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2002}, note = {{PDF file, 114 KB}}, address = {New Orleans LA}, author = {Wen, J.-B. and Chang, Hua-Hua and Hau, K-T.} } @article {277, title = {Outlier detection in high-stakes certification testing}, journal = {Journal of Educational Measurement}, volume = {39}, number = {3}, year = {2002}, pages = {219-233}, abstract = {Discusses recent developments of person-fit analysis in computerized adaptive testing (CAT). Methods from statistical process control are presented that have been proposed to classify an item score pattern as fitting or misfitting the underlying item response theory model in CAT Most person-fit research in CAT is restricted to simulated data. In this study, empirical data from a certification test were used. Alternatives are discussed to generate norms so that bounds can be determined to classify an item score pattern as fitting or misfitting. Using bounds determined from a sample of a high-stakes certification test, the empirical analysis showed that different types of misfit can be distinguished Further applications using statistical process control methods to detect misfitting item score patterns are discussed. (PsycINFO Database Record (c) 2005 APA )}, keywords = {Adaptive Testing, computerized adaptive testing, Educational Measurement, Goodness of Fit, Item Analysis (Statistical), Item Response Theory, person Fit, Statistical Estimation, Statistical Power, Test Scores}, author = {Meijer, R. R.} } @conference {1100, title = {Practical considerations about expected a posteriori estimation in adaptive testing: Adaptive a prior, adaptive corrections for bias, adaptive integration interval}, booktitle = {Paper presented at the annual meeting of the International Objective Measurement Workshops-XI}, year = {2002}, note = {{PDF file, 100 KB}}, address = {New Orleans, LA}, author = {Raiche, G. and Blais, J. G.} } @conference {1079, title = {A {\textquotedblleft}rearrangement procedure{\textquotedblright} for administering adaptive tests with review options}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2002}, note = {{PDF file, 410 KB}}, address = {New Orleans LA}, author = {Papanastasiou, E. C.} } @conference {878, title = {Redeveloping the exposure control parameters of CAT items when a pool is modified}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2002}, note = {{PDF file, 1.113 MB}}, address = {New Orleans LA}, author = {Chang, S-W. and Harris, D. J.} } @conference {1238, title = {Relative precision of ability estimation in polytomous CAT: A comparison under the generalized partial credit model and graded response model}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2002}, note = {$\#$WA02-01 {PDF file, 735 KB}}, address = {New Orleans LA}, author = {Wang, S and Wang, T.} } @conference {958, title = {Reliability and decision accuracy of linear parallel form and multi stage tests with realistic and ideal item pools}, booktitle = {Paper presented at the International Conference on Computer-Based Testing and the Internet}, year = {2002}, address = {Winchester, England}, author = {Jodoin, M. G.} } @conference {1292, title = {The robustness of the unidimensional 3PL IRT model when applied to two-dimensional data in computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2002}, note = {{PDF file, 1.356 MB}}, address = {New Orleans LA}, author = {Zhao, J. C. and McMorris, R. F. and Pruzek, R. M. and Chen, R.} } @article {811, title = {Self-adapted testing: An overview}, journal = {International Journal of Continuing Engineering Education and Life-Long Learning}, volume = {12}, year = {2002}, pages = {107-122}, author = {Wise, S. L. and Ponsoda, V. and Olea, J.} } @conference {1101, title = {Some features of the estimated sampling distribution of the ability estimate in computerized adaptive testing according to two stopping rules}, booktitle = {Communication propos{\'e}e au 11e Biannual International objective measurement workshop. New-Orleans : International Objective Measurement Workshops.}, year = {2002}, author = {Ra{\^\i}che, G. and Blais, J. G.} } @conference {861, title = {Some features of the sampling distribution of the ability estimate in computerized adaptive testing according to two stopping rules}, booktitle = {Paper presented at the annual meeting of the International Objective Measurement Workshops-XI}, year = {2002}, note = {{PDF file, 38 KB}}, address = {New Orleans, LA}, author = {Blais, J-G. and Raiche, G.} } @booklet {1508, title = {STAR Math 2 Computer-Adaptive Math Test and Database: Technical Manual}, year = {2002}, address = {Wisconsin Rapids, WI: Author}, author = {Renaissance-Learning-Inc.} } @conference {1294, title = {Statistical indexes for monitoring item behavior under computer adaptive testing environment}, booktitle = {(Original title: Detecting item misfit in computerized adaptive testing.) Paper presented at the annual meeting of the American Educational Research Association}, year = {2002}, note = {{PDF file, 2.287 MB}}, address = {New Orleans LA}, author = {Zhu, R. and Yu, F. and Liu, S. M.} } @book {1661, title = {Strategies for controlling item exposure in computerized adaptive testing with polytomously scored items}, year = {2002}, note = {{PDF file, 1.83 MB}}, address = {Unpublished doctoral dissertation, University of Texas, Austin}, author = {Davis, L. L.} } @booklet {1436, title = {A strategy for controlling item exposure in multidimensional computerized adaptive testing}, year = {2002}, address = {Available from http://www3. tat.sinica.edu.tw/library/c_tec_rep/c-2002-11.pdf}, author = {Lee, Y. H. and Ip, E.H. and Fuh, C.D.} } @article {346, title = {A structure-based approach to psychological measurement: Matching measurement models to latent structure}, journal = {Assessment}, volume = {9}, number = {1}, year = {2002}, pages = {4-16}, abstract = {The present article sets forth the argument that psychological assessment should be based on a construct{\textquoteright}s latent structure. The authors differentiate dimensional (continuous) and taxonic (categorical) structures at the latent and manifest levels and describe the advantages of matching the assessment approach to the latent structure of a construct. A proper match will decrease measurement error, increase statistical power, clarify statistical relationships, and facilitate the location of an efficient cutting score when applicable. Thus, individuals will be placed along a continuum or assigned to classes more accurately. The authors briefly review the methods by which latent structure can be determined and outline a structure-based approach to assessment that builds on dimensional scaling models, such as item response theory, while incorporating classification methods as appropriate. Finally, the authors empirically demonstrate the utility of their approach and discuss its compatibility with traditional assessment methods and with computerized adaptive testing. (PsycINFO Database Record (c) 2005 APA ) (journal abstract)}, keywords = {Adaptive Testing, Assessment, Classification (Cognitive Process), Computer Assisted, Item Response Theory, Psychological, Scaling (Testing), Statistical Analysis computerized adaptive testing, Taxonomies, Testing}, author = {Ruscio, John and Ruscio, Ayelet Meron} } @article {300, title = {Technology solutions for testing}, journal = {School Administrator}, volume = {4}, number = {59}, year = {2002}, pages = {20-23}, abstract = {Northwest Evaluation Association in Portland, Oregon, consults with state and local educators on assessment issues. Describes several approaches in place at school districts that are using some combination of computer-based tests to measure student growth. The computerized adaptive test adjusts items based on a student{\textquoteright}s answer in "real time." On-demand testing provides almost instant scoring. (MLF)}, author = {Olson, A.} } @inbook {1834, title = {Test models for complex computer-based testing}, year = {2002}, address = {C. N. Mille,. M. T. Potenza, J. J. Fremer, and W. C. Ward (Eds.). Computer-based testing: Building the foundation for future assessments (pp. 67-88). Hillsdale NJ: Erlbaum.}, author = {Luecht, RM and Clauser, B. E.} } @conference {1030, title = {A testlet assembly design for the uniform CPA examination}, booktitle = {Paper presented at the Annual Meeting of the National Council on Measurement in Education.}, year = {2002}, note = {PDF file 192 KB}, address = {New Orleans}, author = {Luecht, RM and Brumfield, T. and Breithaupt, K} } @conference {873, title = {To weight or not to weight {\textendash} balancing influence of initial and later items in CAT}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2002}, note = {{PDF file, 252 KB}}, address = {New Orleans LA}, author = {Chang, Hua-Hua and Ying, Z.} } @conference {1171, title = {Updated item parameter estimates using sparse CAT data}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2002}, note = {{PDF file, 986 KB}}, address = {New Orleans LA}, author = {Smith, R. L. and Rizavi, S. and Paez, R. and Rotou, O.} } @conference {1224, title = {Using judgments of item difficulty to change answers on computerized adaptive vocabulary tests}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2002}, note = {$\#$VI02-01}, address = {New Orleans LA}, author = {Vispoel, W. P. and Clough, S. J. and Bleiler, T.} } @conference {1261, title = {Using testlet response theory to evaluate the equivalence of automatically generated multiple-choice items}, booktitle = {Symposium conducted at the annual meeting of the National Council on Measurement in Eucation}, year = {2002}, address = {New Orleans LA}, author = {Williamson, D. M. and Bejar, I. I.} } @conference {900, title = {Utility of Learning Potential Computerised Adaptive Test (LPCAT) scores in predicting academic performance of bridging students: A comparison with other predictors}, booktitle = {Paper presented at the 5th Annual Society for Industrial and Organisational Psychology Congress}, year = {2002}, address = {Pretoria, South Africa}, author = {De Beer, M.} } @inbook {108, title = {The work ahead: A psychometric infrastructure for computerized adaptive tests}, booktitle = {Computer-based tests: Building the foundation for future assessment}, year = {2002}, note = {Using Smart Source ParsingComputer-based testing: Building the foundation for future assessments. (pp. 1-35). Mahwah, NJ : Lawrence Erlbaum Associates, Publishers. xi, 326 pp}, publisher = {Lawrence Erlbaum Associates, Inc.}, organization = {Lawrence Erlbaum Associates, Inc.}, address = {Mahwah, N.J. USA}, abstract = {(From the chapter) Considers the past and future of computerized adaptive tests and computer-based tests and looks at issues and challenges confronting a testing program as it implements and operates a computer-based test. Recommendations for testing programs from The National Council of Measurement in Education Ad Hoc Committee on Computerized Adaptive Test Disclosure are appended. (PsycINFO Database Record (c) 2005 APA )}, keywords = {Adaptive Testing, Computer Assisted Testing, Educational, Measurement, Psychometrics}, author = {F Drasgow}, editor = {M. P. Potenza and J. J. Freemer and W. C. Ward} } @conference {1258, title = {Adaptation of a-stratified method in variable length computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2001}, note = {{PDF file, 384 KB}}, address = {Seattle WA}, author = {Wen, J.-B. and Chang, Hua-Hua and Hau, K.-T.~} } @booklet {1476, title = {Application of data mining to response data in a computerized adaptive test}, year = {2001}, address = {Paper presented at the Annual Meeting of the National Council on Measurement in Education, Seattle WA}, author = {Mendez, F. A.} } @conference {987, title = {Application of score information for CAT pool development and its connection with "likelihood test information}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2001}, note = {$\#${PDF file, 392 KB}}, address = {Seattle WA}, author = {Krass, I. A.} } @article {89, title = {Assessment in the twenty-first century: A role of computerised adaptive testing in national curriculum subjects}, journal = {Teacher Development}, volume = {5}, number = {2}, year = {2001}, pages = {241-57}, abstract = {With the investment of large sums of money in new technologies forschools and education authorities and the subsequent training of teachers to integrate Information and Communications Technology (ICT) into their teaching strategies, it is remarkable that the old outdated models of assessment still remain. This article highlights the current problems associated with pen-and paper-testing and offers suggestions for an innovative and new approach to assessment for the twenty-first century. Based on the principle of the {\textquoteright}wise examiner{\textquoteright} a computerised adaptive testing system which measures pupils{\textquoteright} ability against the levels of the United Kingdom National Curriculum has been developed for use in mathematics. Using constructed response items, pupils are administered a test tailored to their ability with a reliability index of 0.99. Since the software administers maximally informative questions matched to each pupil{\textquoteright}s current ability estimate, no two pupils will receive the same set of items in the same order therefore removing opportunities for plagarism and teaching to the test. All marking is automated and a journal recording the outcome of the test and highlighting the areas of difficulty for each pupil is available for printing by the teacher. The current prototype of the system can be used on a school{\textquoteright}s network however the authors envisage a day when Examination Boards or the Qualifications and Assessment Authority (QCA) will administer Government tests from a central server to all United Kingdom schools or testing centres. Results will be issued at the time of testing and opportunities for resits will become more widespr}, keywords = {computerized adaptive testing}, author = {Cowan, P. and Morrison, H.} } @conference {1278, title = {a-stratified CAT design with content-blocking}, booktitle = {Paper presented at the Annual Meeting of the Psychometric Society}, year = {2001}, note = {{PDF file, 410 KB}}, address = {King of Prussia, PA}, author = {Yi, Q. and Chang, Hua-Hua} } @conference {905, title = {a-stratified computerized adaptive testing with unequal item exposure across strata}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2001}, note = {$\#$DE01-01}, address = {Seattle WA}, author = {Deng, H. and Chang, Hua-Hua} } @article {56, title = {a-Stratified multistage computerized adaptive testing with b blocking}, journal = {Applied Psychological Measurement}, volume = {25}, number = {4}, year = {2001}, pages = {333-341}, abstract = {Chang \& Ying{\textquoteright}s (1999) computerized adaptive testing item-selection procedure stratifies the item bank according to a parameter values and requires b parameter values to be evenly distributed across all strata. Thus, a and b parameter values must be incorporated into how strata are formed. A refinement is proposed, based on Weiss{\textquoteright} (1973) stratification of items according to b values. Simulation studies using a retired item bank of a Graduate Record Examination test indicate that the new approach improved control of item exposure rates and reduced mean squared errors. }, isbn = {0146-6216}, author = {Chang, Hua-Hua and Qian, J. and Ying, Z.} } @article {55, title = {a-stratified multistage computerized adaptive testing with b blocking}, journal = {Applied Psychological Measurement}, volume = {25}, number = {4}, year = {2001}, pages = {333-41}, abstract = {Proposed a refinement, based on the stratification of items developed by D. Weiss (1973), of the computerized adaptive testing item selection procedure of H. Chang and Z. Ying (1999). Simulation studies using an item bank from the Graduate Record Examination show the benefits of the new procedure. (SLD)}, keywords = {computerized adaptive testing}, author = {Chang, Hua-Hua and Qian, J. and Yang, Z.} } @conference {1223, title = {Can examinees use judgments of item difficulty to improve proficiency estimates on computerized adaptive vocabulary tests?}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2001}, note = {$\#$VI01-01}, address = { Seattle WA}, author = {Vispoel, W. P. and Clough, S. J. and Bleiler, T. Hendrickson, A. B. and Ihrig, D.} } @article {2088, title = {CATSIB: A modified SIBTEST procedure to detect differential item functioning in computerized adaptive tests (Research report)}, year = {2001}, institution = {Law School Admission Council}, address = {Newton, PA}, author = {Nandakumar, R. and Roussos, L.} } @booklet {1383, title = {CB BULATS: Examining the reliability of a computer based test using test-retest method}, year = {2001}, note = {$\#$GE01-01 14-16. {PDF file, 456 KB}}, address = {Cambridge ESOL Research Notes, Issue 5, July 2001, pp}, author = {Geranpayeh, A.} } @article {13, title = {A comparative study of on line pretest item{\textemdash}Calibration/scaling methods in computerized adaptive testing}, journal = {Journal of Educational Measurement}, volume = {38}, number = {3}, year = {2001}, pages = {191-212}, abstract = {The purpose of this study was to compare and evaluate five on-line pretest item-calibration/scaling methods in computerized adaptive testing (CAT): marginal maximum likelihood estimate with one EM cycle (OEM), marginal maximum likelihood estimate with multiple EM cycles (MEM), Stocking{\textquoteright}s Method A, Stocking{\textquoteright}s Method B, and BILOG/Prior. The five methods were evaluated in terms ofitem-parameter recovery, using three different sample sizes (300, 1000 and 3000). The MEM method appeared to be the best choice among these, because it produced the smallest parameter-estimation errors for all sample size conditions. MEM and OEM are mathematically similar, although the OEM method produced larger errors. MEM also was preferable to OEM, unless the amount of timeinvolved in iterative computation is a concern. Stocking{\textquoteright}s Method B also worked very well, but it required anchor items that either would increase test lengths or require larger sample sizes depending on test administration design. Until more appropriate ways of handling sparse data are devised, the BILOG/Prior method may not be a reasonable choice for small sample sizes. Stocking{\textquoteright}s Method A hadthe largest weighted total error, as well as a theoretical weakness (i.e., treating estimated ability as true ability); thus, there appeared to be little reason to use it}, author = {Ban, J. C. and Hanson, B. A. and Wang, T. and Yi, Q. and Harris, D. J.} } @conference {1279, title = {Comparison of the SPRT and CMT procedures in computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2001}, address = {Seattle WA}, author = {Yi, Q. and Hanson, B. and Widiatmo, H. and Harris, D. J.} } @article {396, title = {Computerized adaptive testing with equated number-correct scoring}, journal = {Applied Psychological Measurement}, volume = {25}, number = {4}, year = {2001}, note = {Sage Publications, US}, pages = {343-355}, abstract = {A constrained computerized adaptive testing (CAT) algorithm is presented that can be used to equate CAT number-correct (NC) scores to a reference test. As a result, the CAT NC scores also are equated across administrations. The constraints are derived from van der Linden \& Luecht{\textquoteright}s (1998) set of conditions on item response functions that guarantees identical observed NC score distributions on two test forms. An item bank from the Law School Admission Test was used to compare the results of the algorithm with those for equipercentile observed-score equating, as well as the prediction of NC scores on a reference test using its test response function. The effects of the constraints on the statistical properties of the θ estimator in CAT were examined. }, author = {van der Linden, W. J.} } @article {336, title = {Computerized adaptive testing with the generalized graded unfolding model}, journal = {Applied Psychological Measurement}, volume = {25}, number = {2}, year = {2001}, pages = {177-196}, abstract = {Examined the use of the generalized graded unfolding model (GGUM) in computerized adaptive testing. The objective was to minimize the number of items required to produce equiprecise estimates of person locations. Simulations based on real data about college student attitudes toward abortion and on data generated to fit the GGUM were used. It was found that as few as 7 or 8 items were needed to produce accurate and precise person estimates using an expected a posteriori procedure. The number items in the item bank (20, 40, or 60 items) and their distribution on the continuum (uniform locations or item clusters in moderately extreme locations) had only small effects on the accuracy and precision of the estimates. These results suggest that adaptive testing with the GGUM is a good method for achieving estimates with an approximately uniform level of precision using a small number of items. (PsycINFO Database Record (c) 2005 APA )}, keywords = {Attitude Measurement, College Students computerized adaptive testing, Computer Assisted Testing, Item Response, Models, Statistical Estimation, Theory}, author = {Roberts, J. S. and Lin, Y. and Laughlin, J. E.} } @unpublished {2121, title = {Computerized-adaptive versus paper-and-pencil testing environments: An experimental analysis of examinee experience}, volume = {Doctoral dissertation}, year = {2001}, type = {Unpublished doctoral dissertation}, author = {Bringsjord, E. L.} } @article {295, title = {Concerns with computerized adaptive oral proficiency assessment. A commentary on "Comparing examinee attitudes Toward computer-assisted and other oral proficient assessments": Response to the Norris Commentary}, journal = {Language Learning and Technology}, volume = {5}, number = {2}, year = {2001}, pages = {95-108}, abstract = {Responds to an article on computerized adaptive second language (L2) testing, expressing concerns about the appropriateness of such tests for informing language educators about the language skills of L2 learners and users and fulfilling the intended purposes and achieving the desired consequences of language test use.The authors of the original article respond. (Author/VWL)}, author = {Norris, J. M. and Kenyon, D. M. and Malabonga, V.} } @article {604, title = {CUSUM-based person-fit statistics for adaptive testing}, journal = {Journal of Educational and Behavioral Statistics}, volume = {26}, year = {2001}, pages = {199-218}, author = {van Krimpen-Stoop, E. M. L. A. and Meijer, R. R.} } @conference {835, title = {Data sparseness and online pretest calibration/scaling methods in CAT}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2001}, note = {Also ACT Research Report 2002-1)}, address = {Seattle}, author = {Ban, J and Hanson, B. A. and Yi, Q. and Harris, D.} } @conference {930, title = {Deriving a stopping rule for sequential adaptive tests}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2001}, note = {{PDF file, 111 KB}}, address = {Seattle WA}, author = {Grabovsky, I. and Chang, Hua-Hua and Ying, Z.} } @booklet {1630, title = {Detection of misfitting item-score patterns in computerized adaptive testing}, year = {2001}, note = {$\#$ST01-01 V.}, address = {Enschede, The Netherlands: Febodruk B}, author = {Stoop, E. M. L. A.} } @book {1695, title = {Development and evaluation of test assembly procedures for computerized adaptive testing}, year = {2001}, address = {Unpublished doctoral dissertation, University of Massachusetts, Amherst}, author = {Robin, F.} } @article {720, title = {Development of an adaptive multimedia program to collect patient health data}, journal = {American Journal of Preventative Medicine}, volume = {21}, year = {2001}, pages = {320-324}, author = {Sutherland, L. A. and Campbell, M. and Ornstein, K. and Wildemuth, B. and Lobach, D.} } @booklet {1598, title = {The Development of STAR Early Literacy: A report of the School Renaissance Institute.}, year = {2001}, address = {Madison, WI: Author.}, author = {School-Renaissance-Institute} } @article {358, title = {Developments in measurement of persons and items by means of item response models}, journal = {Behaviormetrika}, volume = {28}, number = {1}, year = {2001}, pages = {65-94}, abstract = {This paper starts with a general introduction into measurement of hypothetical constructs typical of the social and behavioral sciences. After the stages ranging from theory through operationalization and item domain to preliminary test or questionnaire have been treated, the general assumptions of item response theory are discussed. The family of parametric item response models for dichotomous items is introduced and it is explained how parameters for respondents and items are estimated from the scores collected from a sample of respondents who took the test or questionnaire. Next, the family of nonparametric item response models is explained, followed by the 3 classes of item response models for polytomous item scores (e.g., rating scale scores). Then, to what degree the mean item score and the unweighted sum of item scores for persons are useful for measuring items and persons in the context of item response theory is discussed. Methods for fitting parametric and nonparametric models to data are briefly discussed. Finally, the main applications of item response models are discussed, which include equating and item banking, computerized and adaptive testing, research into differential item functioning, person fit research, and cognitive modeling. (PsycINFO Database Record (c) 2005 APA )}, keywords = {Cognitive, Computer Assisted Testing, Item Response Theory, Models, Nonparametric Statistical Tests, Processes}, author = {Sijtsma, K.} } @article {315, title = {Differences between self-adapted and computerized adaptive tests: A meta-analysis}, journal = {Journal of Educational Measurement}, volume = {38}, number = {3}, year = {2001}, pages = {235-247}, abstract = {Self-adapted testing has been described as a variation of computerized adaptive testing that reduces test anxiety and thereby enhances test performance. The purpose of this study was to gain a better understanding of these proposed effects of self-adapted tests (SATs); meta-analysis procedures were used to estimate differences between SATs and computerized adaptive tests (CATs) in proficiency estimates and post-test anxiety levels across studies in which these two types of tests have been compared. After controlling for measurement error the results showed that SATs yielded proficiency estimates that were 0.12 standard deviation units higher and post-test anxiety levels that were 0.19 standard deviation units lower than those yielded by CATs. The authors speculate about possible reasons for these differences and discuss advantages and disadvantages of using SATs in operational settings. (PsycINFO Database Record (c) 2005 APA )}, keywords = {Adaptive Testing, Computer Assisted Testing, Scores computerized adaptive testing, Test, Test Anxiety}, author = {Pitkin, A. K. and Vispoel, W. P.} } @conference {1121, title = {The effect of test and examinee characteristics on the occurrence of aberrant response patterns in a computerized adaptive test}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2001}, note = {$\#$RI01-01}, address = {Seattle WA}, author = {Rizavi, S. and Swaminathan, H.} } @conference {1139, title = {Effective use of simulated data in an on-line item calibration in practical situations of computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2001}, address = {Seattle WA}, author = {Samejima, F.} } @conference {880, title = {Effects of changes in the examinees{\textquoteright} ability distribution on the exposure control methods in CAT}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2001}, note = {$\#$CH01-02 {PDF file, 695 KB}}, address = {Seattle WA}, author = {Chang, S-W. and Twu, B.-Y.} } @conference {1138, title = {Efficient on-line item calibration using a nonparametric method adjusted to computerized adaptive testing}, booktitle = {Paper presented at the Annual Meeting of the National Council on Measurement in Education}, year = {2001}, address = {Seattle WA}, author = {Samejima, F.} } @article {9, title = {Evaluation of an MMPI-A short form: Implications for adaptive testing}, journal = {Journal of Personality Assessment}, volume = {76}, number = {1}, year = {2001}, pages = {76-89}, abstract = {Reports some psychometric properties of an MMPI-Adolescent version (MMPI-A; J. N. Butcher et al, 1992) short form based on administration of the 1st 150 items of this test instrument. The authors report results for both the MMPI-A normative sample of 1,620 adolescents (aged 14-18 yrs) and a clinical sample of 565 adolescents (mean age 15.2 yrs) in a variety of treatment settings. The authors summarize results for the MMPI-A basic scales in terms of Pearson product-moment correlations generated between full administration and short-form administration formats and mean T score elevations for the basic scales generated by each approach. In this investigation, the authors also examine single-scale and 2-point congruences found for the MMPI-A basic clinical scales as derived from standard and short-form administrations. The authors present the relative strengths and weaknesses of the MMPI-A short form and discuss the findings in terms of implications for attempts to shorten the item pool through the use of computerized adaptive assessment approaches. (PsycINFO Database Record (c) 2005 APA )}, keywords = {Adaptive Testing, Mean, Minnesota Multiphasic Personality Inventory, Psychometrics, Statistical Correlation, Statistical Samples, Test Forms}, author = {Archer, R. P. and Tirrell, C. A. and Elkins, D. E.} } @article {2124, title = {An examination of conditioning variables used in computer adaptive testing for DIF analyses}, journal = {Applied Measurement in Education}, volume = {14}, year = {2001}, pages = {3-16}, author = {Walker, C. M. and Beretvas, S. N and Ackerman, T. A.} } @conference {865, title = {An examination of item review on a CAT using the specific information item selection algorithm}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2001}, note = {PDF file, 325 K}, address = {Seattle WA}, author = {Bowles, R and Pommerich, M} } @conference {854, title = {An examination of item review on a CAT using the specific information item selection algorithm}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2001}, note = {PDF file, 325 KB}}, address = {Seattle WA}, author = {Bowles, R and Pommerich, M} } @booklet {1338, title = {An examination of item review on computer adaptive tests}, year = {2001}, address = {Manuscript in preparation, University of Virginia}, author = {Bowles, R} } @conference {1000, title = {An examination of item selection rules by stratified CAT designs integrated with content balancing methods}, booktitle = {Paper presented at the Annual Meeting of the American Educational Research Association}, year = {2001}, note = {{PDF file, 296 KB}}, address = {Seattle WA}, author = {Leung, C-K.. and Chang, Hua-Hua and Hau, K-T.} } @booklet {1357, title = {An examination of testlet scoring and item exposure constraints in the verbal reasoning section of the MCAT}, year = {2001}, address = {MCAT Monograph Series: Association of American Medical Colleges}, author = {Davis, L. L. and Dodd, B. G.} } @conference {897, title = {An examination of testlet scoring and item exposure constraints in the Verbal Reasoning section of the MCAT}, year = {2001}, note = {{PDF file, 653 KB}}, author = {Davis, L. L. and Dodd, B. G.} } @article {36, title = {An examination of the comparative reliability, validity, and accuracy of performance ratings made using computerized adaptive rating scales}, journal = {Journal of Applied Psychology}, volume = {86}, number = {5}, year = {2001}, note = {214803450021-9010Journal ArticleValidation Studies}, pages = {965-973}, abstract = {This laboratory research compared the reliability, validity, and accuracy of a computerized adaptive rating scale (CARS) format and 2 relatively common and representative rating formats. The CARS is a paired-comparison rating task that uses adaptive testing principles to present pairs of scaled behavioral statements to the rater to iteratively estimate a ratee{\textquoteright}s effectiveness on 3 dimensions of contextual performance. Videotaped vignettes of 6 office workers were prepared, depicting prescripted levels of contextual performance, and 112 subjects rated these vignettes using the CARS format and one or the other competing format. Results showed 23\%-37\% lower standard errors of measurement for the CARS format. In addition, validity was significantly higher for the CARS format (d = .18), and Cronbach{\textquoteright}s accuracy coefficients showed significantly higher accuracy, with a median effect size of .08. The discussion focuses on possible reasons for the results.}, keywords = {*Computer Simulation, *Employee Performance Appraisal, *Personnel Selection, Adult, Automatic Data Processing, Female, Human, Male, Reproducibility of Results, Sensitivity and Specificity, Support, U.S. Gov{\textquoteright}t, Non-P.H.S., Task Performance and Analysis, Video Recording}, author = {Borman, W. C. and Buck, D. E. and Hanson, M. A. and Motowidlo, S. J. and Stark, S. and F Drasgow} } @book {1645, title = {The FastTEST Professional Testing System, Version 1.6 [Computer software]}, year = {2001}, address = {St. Paul MN: Author}, author = {Assessment-Systems-Corporation} } @article {90, title = {Final answer?}, journal = {American School Board Journal}, volume = {188}, number = {3}, year = {2001}, pages = {24-26}, abstract = {The Northwest Evaluation Association helped an Indiana school district develop a computerized adaptive testing system that was aligned with its curriculum and geared toward measuring individual student growth. Now the district can obtain such information from semester to semester and year to year, get immediate results, and test students on demand. (MLH)}, keywords = {computerized adaptive testing}, author = {Coyle, J.} } @conference {1021, title = {Impact of item location effects on ability estimation in CAT}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2001}, note = {$\#$LI01-01}, address = {Seattle WA}, author = {Liu, M. and Zhu, R. and Guo, F.} } @conference {1284, title = {Impact of scoring options for not reached items in CAT}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2001}, note = {PDF file, 232 K}, address = {Seattle WA}, author = {Yi, Q. and Widiatmo, H. and Ban, J-C. and Harris, D. J.} } @booklet {1582, title = {Impact of several computer-based testing variables on the psychometric properties of credentialing examinations (Laboratory of Psychometric and Evaluative Research Report No 393)}, year = {2001}, address = {Amherst, MA: University of Massachusetts, School of Education.}, author = {Xing, D. and Hambleton, R. K.} } @conference {1273, title = {Impact of several computer-based testing variables on the psychometric properties of credentialing examinations}, booktitle = {Paper presented at the Annual Meeting of the National Council on Measurement in Education}, year = {2001}, address = {Seattle WA}, author = {Xing, D. and Hambleton, R. K.} } @booklet {1547, title = {Implementing constrained CAT with shadow tests for large item pools}, year = {2001}, address = {Submitted for publication}, author = {Veldkamp, B. P.} } @booklet {1345, title = {Implementing content constraints in a-stratified adaptive testing using a shadow test approach (Research Report 01-001)}, year = {2001}, address = {University of Twente, Department of Educational Measurement and Data Analysis}, author = {Chang, Hua-Hua and van der Linden, W. J.} } @conference {1235, title = {The influence of item characteristics and administration position on CAT Scores}, booktitle = {Paper presented at the 33rd annual meeting of the Northeastern Educational Research Association}, year = {2001}, address = {Hudson Valley, NY, October 26, 2001}, author = {Wang, L. and Gawlick, L.} } @conference {1001, title = {Integrating stratification and information approaches for multiple constrained CAT}, booktitle = {Paper presented at the Annual Meeting of the National Council on Measurement in Education}, year = {2001}, note = {{PDF file, 322 KB}}, address = {Seattle WA}, author = {Leung, C.-I. and Chang, Hua-Hua and Hau, K-T.} } @conference {1159, title = {An investigation of procedures for estimating error indexes in proficiency estimation in a realistic second-order equitable CAT environment}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2001}, address = {Seattle WA}, author = {Shyu, C.-Y. and Fan, M. and Thompson, T, and Hsu.} } @conference {957, title = {An investigation of the impact of items that exhibit mild DIF on ability estimation in CAT}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2001}, address = {Seattle WA}, author = {Jennings, J. A. and Dodd, B. G. and Fitzpatrick, S.} } @booklet {1604, title = {Item and passage selection algorithm simulations for a computerized adaptive version of the verbal section of the Medical College Admission Test (MCAT)}, year = {2001}, address = {MCAT Monograph Series}, author = {Smith, R. W. and Plake, B. S. and De Ayala, R. J.,} } @conference {1112, title = {Item pool design for computerized adaptive tests}, booktitle = {Invited small group session at the 6th Conference of the European Association of Psychological Assessment}, year = {2001}, address = {Aachen, Germany}, author = {Reckase, M. D.} } @inbook {385, title = {Item response theory applied to combinations of multiple-choice and constructed-response items--approximation methods for scale scores}, booktitle = {Test scoring}, year = {2001}, note = {Using Smart Source ParsingTest scoring. (pp. 293-341). Mahwah, NJ : Lawrence Erlbaum Associates, Publishers. xii, 422 pp}, pages = {289-315}, publisher = {Lawrence Erlbaum Associates}, organization = {Lawrence Erlbaum Associates}, chapter = {8}, address = {Mahwah, N.J. USA}, abstract = {(From the chapter) The authors develop approximate methods that replace the scoring tables with weighted linear combinations of the component scores. Topics discussed include: a linear approximation for the extension to combinations of scores; the generalization of two or more scores; potential applications of linear approximations to item response theory in computerized adaptive tests; and evaluation of the pattern-of-summed-scores, and Gaussian approximation, estimates of proficiency. (PsycINFO Database Record (c) 2005 APA )}, keywords = {Adaptive Testing, Item Response Theory, Method), Multiple Choice (Testing, Scoring (Testing), Statistical Estimation, Statistical Weighting, Test Items, Test Scores}, author = {Thissen, D. and Nelson, L. A. and Swygert, K. A.} } @article {188, title = {Item selection in computerized adaptive testing: Should more discriminating items be used first?}, journal = {Journal of Educational Measurement}, volume = {38}, number = {3}, year = {2001}, pages = {249-266}, abstract = {During computerized adaptive testing (CAT), items are selected continuously according to the test-taker{\textquoteright}s estimated ability. Test security has become a problem because high-discrimination items are more likely to be selected and become overexposed. So, there seems to be a tradeoff between high efficiency in ability estimations and balanced usage of items. This series of four studies addressed the dilemma by focusing on the notion of whether more or less discriminating items should be used first in CAT. The first study demonstrated that the common maximum information method with J. B. Sympson and R. D. Hetter (1985) control resulted in the use of more discriminating items first. The remaining studies showed that using items in the reverse order, as described in H. Chang and Z. Yings (1999) stratified method had potential advantages: (a) a more balanced item usage and (b) a relatively stable resultant item pool structure with easy and inexpensive management. This stratified method may have ability-estimation efficiency better than or close to that of other methods. It is argued that the judicious selection of items, as in the stratified method, is a more active control of item exposure. (PsycINFO Database Record (c) 2005 APA )}, keywords = {ability, Adaptive Testing, Computer Assisted Testing, Estimation, Statistical, Test Items computerized adaptive testing}, author = {Hau, Kit-Tai and Chang, Hua-Hua} } @article {482, title = {On maximizing item information and matching difficulty with ability}, journal = {Psychometrika}, volume = {66}, year = {2001}, pages = {69-77}, author = {Bickel, P. and Buyske, S. and Chang, Hua-Hua and Ying, Z.} } @conference {1245, title = {Measurement efficiency of multidimensional computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the American Psychological Association}, year = {2001}, address = {San Francisco CA}, author = {Wang, W-C. and Chen, B.-H.} } @conference {1149, title = {Measuring test compromise in high-stakes computerized adaptive testing: A Bayesian Strategy for surrogate test-taker detection}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2001}, note = {{PDF file, 275 KB}}, address = {Seattle WA}, author = {Segall, D. O.} } @booklet {1623, title = {A method for building a realistic model of test taker behavior for computerized adaptive testing (Research Report 01-22)}, year = {2001}, address = {Princeton NJ: Educational Testing Service}, author = {Stocking, M. L. and Steffen, M. and Eignor, D. R.} } @conference {1064, title = {Methods to test invariant ability across subgroups of items in CAT}, booktitle = {Paper presented at the Annual Meeting of the National Council on Measurement in Education}, year = {2001}, address = {Seattle WA}, author = {Meijer, R. R.} } @article {773, title = {A minimax procedure in the context of sequential testing problems in psychodiagnostics}, journal = {British Journal of Mathematical and Statistical Psychology}, volume = {54}, year = {2001}, note = {$\#$VO01139 Vos, H J (2001) A minimax procedure in the context of sequential testing problems}, pages = {139-159}, author = {Vos, H. J.} } @conference {929, title = {Modeling variability in item parameters in CAT}, booktitle = {Paper presented at the Annual Meeting of the National Council on Measurement in Education}, year = {2001}, address = {Seattle WA}, author = {Glas, C. A. W. and van der Linden, W. J.} } @conference {1173, title = {Monitoring items for changes in performance in computerized adaptive tests}, booktitle = {Paper presented at the annual conference of the National Council on Measurement in Education}, year = {2001}, address = {Seattle, Washington}, author = {Smith, R. L. and Wang, M.M. and Wingersky, M. and Zhao, C.} } @conference {1120, title = {A monte carlo study of the feasibility of on-the-fly assessment}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2001}, address = {Seattle WA}, author = {Revuelta, J. and Bejar, I. I. and Stocking, M.} } @article {392, title = {Multidimensional adaptive testing using the weighted likelihood estimation}, journal = {Dissertation Abstracts International Section A: Humanities \& Social Sciences}, volume = {61}, number = {12-A}, year = {2001}, pages = {4746}, abstract = {This study extended Warm{\textquoteright}s (1989) weighted likelihood estimation (WLE) to a multidimensional computerized adaptive test (MCAT) setting. WLE was compared with the maximum likelihood estimation (MLE), expected a posteriori (EAP), and maximum a posteriori (MAP) using a three-dimensional 3PL IRT model under a variety of computerized adaptive testing conditions. The dependent variables included bias, standard error of ability estimates (SE), square root of mean square error (RMSE), and test information. The independent variables were ability estimation methods, intercorrelation levels between dimensions, multidimensional structures, and ability combinations. Simulation results were presented in terms of descriptive statistics, such as figures and tables. In addition, inferential procedures were used to analyze bias by conceptualizing this Monte Carlo study as a statistical sampling experiment. The results of this study indicate that WLE and the other three estimation methods yield significantly more accurate ability estimates under an approximate simple test structure with one dominant dimension and several secondary dimensions. All four estimation methods, especially WLE, yield very large SEs when a three equally dominant multidimensional structure was employed. Consistent with previous findings based on unidimensional IRT model, MLE and WLE are less biased in the extreme of the ability scale; MLE and WLE yield larger SEs than the Bayesian methods; test information-based SEs underestimate actual SEs for both MLE and WLE in MCAT situations, especially at shorter test lengths; WLE reduced the bias of MLE under the approximate simple structure; test information-based SEs underestimates the actual SEs of MLE and WLE estimators in the MCAT conditions, similar to the findings of Warm (1989) in the unidimensional case. The results from the MCAT simulations did show some advantages of WLE in reducing the bias of MLE under the approximate simple structure with a fixed test length of 50 items, which was consistent with the previous research findings based on different unidimensional models. It is clear from the current results that all four methods perform very poorly when the multidimensional structures with multiple dominant factors were employed. More research efforts are urged to investigate systematically how different multidimensional structures affect the accuracy and reliability of ability estimation. Based on the simulated results in this study, there is no significant effect found on the ability estimation from the intercorrelation between dimensions. (PsycINFO Database Record (c) 2003 APA, all rights reserved).}, keywords = {computerized adaptive testing}, author = {Tseng, F-L.} } @conference {1207, title = {Multidimensional adaptive testing using weighted likelihood estimation: A comparison of estimation methods}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2001}, note = {{PDF file, 988 KB}}, address = {Seattle WA}, author = {Tseng, F.-E. and Hsu, T.-C.} } @conference {1231, title = {Multidimensional IRT-based adaptive sequential mastery testing}, booktitle = {Paper presented at the Annual Meeting of the National Council on Measurement in Education}, year = {2001}, address = {Seattle WA}, author = {Vos, H. J. and Glas, C. E. W.} } @article {21, title = {NCLEX-RN performance: predicting success on the computerized examination}, journal = {Journal of Professional Nursing}, volume = {17}, number = {4}, year = {2001}, note = {8755-7223Journal Article}, month = {Jul-Aug}, pages = {158-165}, abstract = {Since the adoption of the Computerized Adaptive Testing (CAT) format of the National Certification Licensure Examination for Registered Nurses (NCLEX-RN), no studies have been reported in the literature on predictors of successful performance by baccalaureate nursing graduates on the licensure examination. In this study, a discriminant analysis was used to identify which of 21 variables can be significant predictors of success on the CAT NCLEX-RN. The convenience sample consisted of 289 individuals who graduated from a baccalaureate nursing program between 1995 and 1998. Seven significant predictor variables were identified. The total number of C+ or lower grades earned in nursing theory courses was the best predictor, followed by grades in several individual nursing courses. More than 93 per cent of graduates were correctly classified. Ninety-four per cent of NCLEX "passes" were correctly classified, as were 92 per cent of NCLEX failures. This degree of accuracy in classifying CAT NCLEX-RN failures represents a marked improvement over results reported in previous studies of licensure examinations, and suggests the discriminant function will be helpful in identifying future students in danger of failure. J Prof Nurs 17:158-165, 2001.}, keywords = {*Education, Nursing, Baccalaureate, *Educational Measurement, *Licensure, Adult, Female, Humans, Male, Predictive Value of Tests, Software}, author = {Beeman, P. B. and Waterhouse, J. K.} } @conference {1084, title = {Nearest neighbors, simple strata, and probabilistic parameters: An empirical comparison of methods for item exposure control in CATs}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2001}, address = {Seattle WA}, author = {Parshall, C. G. and Kromrey, J. D. and Harmes, J. C. and Sentovich, C.} } @conference {881, title = {A new approach to simulation studies in computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2001}, note = {{PDF file, 251 KB}}, address = {Seattle WA}, author = {Chen, S-Y.} } @article {818, title = {A new computer algorithm for simultaneous test construction of two-stage and multistage testing}, journal = {Journal of Educational and Behavioral Statistics}, volume = {26}, year = {2001}, pages = {180-198}, author = {Wu, I. L.} } @article {279, title = {Nouveaux d{\'e}veloppements dans le domaine du testing informatis{\'e} [New developments in the area of computerized testing]}, journal = {Psychologie Fran{\c c}aise}, volume = {46}, number = {3}, year = {2001}, pages = {221-230}, abstract = {L{\textquoteright}usage de l{\textquoteright}{\'e}valuation assist{\'e}e par ordinateur s{\textquoteright}est fortement d{\'e}velopp{\'e} depuis la premi{\`e}re formulation de ses principes de base dans les ann{\'e}es soixante et soixante-dix. Cet article offre une introduction aux derniers d{\'e}veloppements dans le domaine de l{\textquoteright}{\'e}valuation assist{\'e}e par ordinateur, en particulier celui du testing adaptative informatis{\'e}e (TAI). L{\textquoteright}estimation de l{\textquoteright}aptitude, la s{\'e}lection des items et le d{\'e}veloppement d{\textquoteright}une base d{\textquoteright}items dans le cas du TAI sont discut{\'e}s. De plus, des exemples d{\textquoteright}utilisations innovantes de l{\textquoteright}ordinateur dans des syst{\`e}mes int{\'e}gr{\'e}s de testing et de testing via Internet sont pr{\'e}sent{\'e}s. L{\textquoteright}article se termine par quelques illustrations de nouvelles applications du testing informatis{\'e} et des suggestions pour des recherches futures.Discusses the latest developments in computerized psychological assessment, with emphasis on computerized adaptive testing (CAT). Ability estimation, item selection, and item pool development in CAT are described. Examples of some innovative approaches to CAT are presented. (PsycINFO Database Record (c) 2005 APA )}, keywords = {Adaptive Testing, Computer Applications, Computer Assisted, Diagnosis, Psychological Assessment computerized adaptive testing}, author = {Meijer, R. R. and Gr{\'e}goire, J.} } @conference {935, title = {On-line Calibration Using PARSCALE Item Specific Prior Method: Changing Test Population and Sample Size}, booktitle = {Paper presented at National Council on Measurement in Education Annual Meeting}, year = {2001}, address = {Seattle, Washington}, author = {Guo, F. and Stone, E. and Cruz, D.} } @booklet {1402, title = {Online item parameter recalibration: Application of missing data treatments to overcome the effects of sparse data conditions in a computerized adaptive version of the MCAT}, year = {2001}, note = {{PDF file, 406 KB}}, address = {Unpublished manuscript}, author = {Harmes, J. C. and Kromrey, J. D. and Parshall, C. G.} } @article {39, title = {Outlier measures and norming methods for computerized adaptive tests}, journal = {Journal of Educational and Behavioral Statistics}, volume = {26}, number = {1}, year = {2001}, pages = {85-104}, abstract = {Notes that the problem of identifying outliers has 2 important aspects: the choice of outlier measures and the method to assess the degree of outlyingness (norming) of those measures. Several classes of measures for identifying outliers in Computerized Adaptive Tests (CATs) are introduced. Some of these measures are constructed to take advantage of CATs{\textquoteright} sequential choice of items; other measures are taken directly from paper and pencil (P\&P) tests and are used for baseline comparisons. Assessing the degree of outlyingness of CAT responses, however, can not be applied directly from P\&P tests because stopping rules associated with CATs yield examinee responses of varying lengths. Standard outlier measures are highly correlated with the varying lengths which makes comparison across examinees impossible. Therefore, 4 methods are presented and compared which map outlier statistics to a familiar probability scale (a p value). The methods are explored in the context of CAT data from a 1995 Nationally Administered Computerized Examination (NACE). (PsycINFO Database Record (c) 2005 APA )}, keywords = {Adaptive Testing, Computer Assisted Testing, Statistical Analysis, Test Norms}, author = {Bradlow, E. T. and Weiss, R. E.} } @booklet {1368, title = {Overexposure and underexposure of items in computerized adaptive testing (Measurement and Research Department Reports 2001-1)}, year = {2001}, note = {{PDF file, 276 KB}}, address = {Arnhem, The Netherlands: CITO Groep}, author = {Theo Eggen} } @article {383, title = {Pasado, presente y futuro de los test adaptativos informatizados: Entrevista con Isaac I. B{\'e}jar [Past, present and future of computerized adaptive testing: Interview with Isaac I. B{\'e}jar]}, journal = {Psicothema}, volume = {13}, number = {4}, year = {2001}, pages = {685-690}, abstract = {En este art{\'\i}culo se presenta el resultado de una entrevista con Isaac I. Bejar. El Dr. Bejar es actualmente Investigador Cient{\'\i}fico Principal y Director del Centro para el Dise{\~n}o de Evaluaci{\'o}n y Sistemas de Puntuaci{\'o}n perteneciente a la Divisi{\'o}n de Investigaci{\'o}n del Servicio de Medici{\'o}n Educativa (Educa - tional Testing Service, Princeton, NJ, EE.UU.). El objetivo de esta entrevista fue conversar sobre el pasado, presente y futuro de los Tests Adaptativos Informatizados. En la entrevista se recogen los inicios de los Tests Adaptativos y de los Tests Adaptativos Informatizados y {\'u}ltimos avances que se desarrollan en el Educational Testing Service sobre este tipo de tests (modelos generativos, isomorfos, puntuaci{\'o}n autom{\'a}tica de {\'\i}tems de ensayo{\textellipsis}). Se finaliza con la visi{\'o}n de futuro de los Tests Adaptativos Informatizados y su utilizaci{\'o}n en Espa{\~n}a.Past, present and future of Computerized Adaptive Testing: Interview with Isaac I. Bejar. In this paper the results of an interview with Isaac I. Bejar are presented. Dr. Bejar is currently Principal Research Scientist and Director of Center for Assessment Design and Scoring, in Research Division at Educational Testing Service (Princeton, NJ, U.S.A.). The aim of this interview was to review the past, present and future of the Computerized Adaptive Tests. The beginnings of the Adaptive Tests and Computerized Adaptive Tests, and the latest advances developed at the Educational Testing Service (generative response models, isomorphs, automated scoring{\textellipsis}) are reviewed. The future of Computerized Adaptive Tests is analyzed, and its utilization in Spain commented.}, keywords = {computerized adaptive testing}, isbn = {0214-9915}, author = {Tejada, R. and Antonio, J.} } @article {784, title = {Polytomous modeling of cognitive errors in computer adaptive testing}, journal = {Journal of Applied Measurement}, volume = {2 (4).}, year = {2001}, abstract = {Used Monte Carlo simulation to compare the relative measurement efficiency of polytomous modeling and dichotomous modeling under different scoring schemes and termination criteria. Results suggest that polytomous computerized adaptive testing (CAT) yields marginal gains over dichotomous CAT when termination criteria are more stringent. Discusses conditions under which polytomous CAT cannot prevent the nonuniform gain in test information. (SLD)}, author = {Wang, L.-S. and Chun-Shan Li.} } @conference {1098, title = {Pour une {\'e}valuation sur mesure des {\'e}tudiants : d{\'e}fis et enjeux du testing adaptatif}, booktitle = {Commnication pr{\'e}sent{\'e}e {\`a} l{\textquoteright}int{\'e}rieur de la 23e session d{\textquoteright}{\'e}tudes de l{\textquoteright}Association pour le d{\'e}velopement de la mesure et de l{\textquoteright}{\'e}valuation en {\'e}ducation}, year = {2001}, note = {Qu{\'e}bec: ADM{\'E}{\'E}.}, address = {ADM{\'E}{\'E}}, author = {Ra{\^\i}che, G.} } @conference {1102, title = {Pour une {\'e}valuation sur mesure pour chaque {\'e}tudiant : d{\'e}fis et enjeux du testing adaptatif par ordinateur en {\'e}ducation [Tailored testing for each student~: Principles and stakes of computerized adaptive testing in education]}, booktitle = {Presented at the 23th Study Session of the ADM{\'E}{\'E}. Qu{\'e}bec: Association pour le d{\'e}veloppement de la mesure et de l{\textquoteright}{\'e}valuation en {\'e}ducation (ADM{\'E}{\'E}).}, year = {2001}, author = {Ra{\^\i}che, G, Blais, J.G. and Boiteau, N.} } @inbook {362, title = {Practical issues in setting standards on computerized adaptive tests}, booktitle = {Setting performance standards: Concepts, methods, and perspectives}, year = {2001}, note = {Using Smart Source ParsingSetting performance standards: Concepts, methods, and perspectives. (pp. 355-369). Mahwah, NJ : Lawrence Erlbaum Associates, Publishers. xiii, 510 pp}, pages = {355-369}, publisher = {Lawrence Erlbaum Associates, Inc.}, organization = {Lawrence Erlbaum Associates, Inc.}, address = {Mahwah, N.J. USA}, abstract = {(From the chapter) Examples of setting standards on computerized adaptive tests (CATs) are hard to find. Some examples of CATs involving performance standards include the registered nurse exam and the Novell systems engineer exam. Although CATs do not require separate standard setting-methods, there are special issues to be addressed by test specialist who set performance standards on CATs. Setting standards on a CAT will typical require modifications on the procedures used with more traditional, fixed-form, paper-and -pencil examinations. The purpose of this chapter is to illustrate why CATs pose special challenges to the standard setter. (PsycINFO Database Record (c) 2005 APA )}, keywords = {Adaptive Testing, Computer Assisted Testing, Performance Tests, Testing Methods}, author = {Sireci, S. G. and Clauser, B. E.} } @article {785, title = {Precision of Warm{\textquoteright}s weighted likelihood estimation of ability for a polytomous model in CAT}, journal = {Applied Psychological Measurement}, volume = {25}, year = {2001}, pages = {317-331}, author = {Wang, S., and Wang, T.} } @conference {1097, title = {Principes et enjeux du testing adaptatif : de la loi des petits nombres {\`a} la loi des grands nombres}, booktitle = {Communication pr{\'e}sent{\'e}e {\`a} l{\textquoteright}int{\'e}rieur du 69e congr{\`e}s de l{\textquoteright}Association canadienne fran{\c c}aise pour l{\textquoteright}avancement de la science}, year = {2001}, note = {Sherbrooke: Acfas.}, address = {Acfas}, author = {Ra{\^\i}che, G.} } @book {1689, title = {A rearrangement procedure for administering adaptive tests when review options are permitted}, year = {2001}, address = {Unpublished doctoral dissertation, Michigan State University}, author = {Papanastasiou, E. C.} } @booklet {1565, title = {Refining a system for computerized adaptive testing pool creation (Research Report 01-18)}, year = {2001}, address = {Princeton NJ: Educational Testing Service}, author = {Way, W. D. and Swanson, L. and Steffen, M. and Stocking, M. L.} } @conference {1251, title = {Refining a system for computerized adaptive testing pool creation}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2001}, address = {Seattle WA}, author = {Way, W. D. and Swanson, l, and Stocking, M.} } @article {297, title = {Requerimientos, aplicaciones e investigaci{\'o}n en tests adaptativos informatizados [Requirements, applications, and investigation in computerized adaptive testing]}, journal = {Apuntes de Psicologia}, volume = {19}, number = {1}, year = {2001}, pages = {11-28}, abstract = {Summarizes the main requirements and applications of computerized adaptive testing (CAT) with emphasis on the differences between CAT and conventional computerized tests. Psychometric properties of estimations based on CAT, item selection strategies, and implementation software are described. Results of CAT studies in Spanish-speaking samples are described. Implications for developing a CAT measuring the English vocabulary of Spanish-speaking students are discussed. (PsycINFO Database Record (c) 2005 APA )}, keywords = {Computer Assisted Testing, English as Second Language, Psychometrics computerized adaptive testing}, author = {Olea D{\'\i}az, J. and Ponsoda Gil, V. and Revuelta Men{\'e}ndez, J. and Hontangas Beltr{\'a}n, P. and Abad, F. J.} } @booklet {1564, title = {Scoring alternatives for incomplete computerized adaptive tests (Research Report 01-20)}, year = {2001}, address = {Princeton NJ: Educational Testing Service}, author = {Way, W. D. and Gawlick, L. A. and Eignor, D. R.} } @booklet {1507, title = {STAR Early Literacy Computer-Adaptive Diagnostic Assessment: Technical Manual}, year = {2001}, address = {Wisconsin Rapids, WI: Author}, author = {Renaissance-Learning-Inc.} } @conference {1232, title = {A system for on-the-fly adaptive testing}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2001}, address = {Seattle WA}, author = {Wagner, M. E.} } @article {670, title = {Test anxiety and test performance: Comparing paper-based and computer-adaptive versions of the Graduate Record Examinations (GRE) General test}, journal = {Journal of educational computing research}, volume = {24 }, year = {2001}, pages = {249-273.}, author = {Powers, D. E.} } @conference {1161, title = {Testing a computerized adaptive personality inventory using simulated response data}, booktitle = {Paper presented at the annual meeting of the American Psychological Association}, year = {2001}, address = {San Francisco CA}, author = {Simms, L.} } @booklet {1470, title = {Testing via the Internet: A literature review and analysis of issues for Department of Defense Internet testing of the Armed Services Vocational Aptitude Battery (ASVAB) in high schools (FR-01-12)}, year = {2001}, note = {{PDF file, 894 KB}}, address = {Alexandria VA: Human Resources Research Organization}, author = {J. R. McBride and Paddock, A. F. and Wise, L. L. and Strickland, W. J. and B. K. Waters} } @article {197, title = {Toepassing van een computergestuurde adaptieve testprocedure op persoonlijkheidsdata [Application of a computerised adaptive test procedure on personality data]}, journal = {Nederlands Tijdschrift voor de Psychologie en haar Grensgebieden}, volume = {56}, number = {3}, year = {2001}, pages = {119-133}, abstract = {Studied the applicability of a computerized adaptive testing procedure to an existing personality questionnaire within the framework of item response theory. The procedure was applied to the scores of 1,143 male and female university students (mean age 21.8 yrs) in the Netherlands on the Neuroticism scale of the Amsterdam Biographical Questionnaire (G. J. Wilde, 1963). The graded response model (F. Samejima, 1969) was used. The quality of the adaptive test scores was measured based on their correlation with test scores for the entire item bank and on their correlation with scores on other scales from the personality test. The results indicate that computerized adaptive testing can be applied to personality scales. (PsycINFO Database Record (c) 2005 APA )}, keywords = {Adaptive Testing, Computer Applications, Computer Assisted Testing, Personality Measures, Test Reliability computerized adaptive testing}, author = {Hol, A. M. and Vorst, H. C. M. and Mellenbergh, G. J.} } @booklet {1560, title = {User{\textquoteright}s guide for SCORIGHT (version 1): A computer program for scoring tests built of testlets (Research Report 01-06)}, year = {2001}, note = {$\#$WA01-06 {PDF file, 2.349 MB}}, address = {Princeton NJ: Educational Testing Service.}, author = {Wang, X and Bradlow, E. T. and Wainer, H.,} } @booklet {1558, title = {Users guide for SCORIGHT (version 2) : A computer program for scoring tests built of testlets (Research Report 01-06)}, year = {2001}, address = {Princeton NJ: Educational Testing Service.}, author = {Wang, X and Bradlow, E. T. and Wainer, H.,} } @conference {1212, title = {Using response times to detect aberrant behavior in computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2001}, address = {Seattle WA}, author = {van der Linden, W. J. and van Krimpen-Stoop, E. M. L. A.} } @article {576, title = {Validity issues in computer-based testing}, journal = {Educational Measurement: Issues and Practice}, volume = {20(3)}, year = {2001}, pages = {16-25}, author = {Huff, K. L. and Sireci, S. G.} } @booklet {1387, title = {Adaptive mastery testing using a multidimensional IRT model and Bayesian sequential decision theory (Research Report 00-06)}, year = {2000}, address = {Enschede, The Netherlands: University of Twente, Faculty of Educational Science and Technology, Department of Measurement and Data Analysis}, author = {Glas, C. A. W. and Vos, H. J.} } @article {107, title = {Algoritmo mixto m{\'\i}nima entrop{\'\i}a-m{\'a}xima informaci{\'o}n para la selecci{\'o}n de {\'\i}tems en un test adaptativo informatizado}, journal = {Psicothema}, volume = {12}, number = {2}, year = {2000}, pages = {12-14}, abstract = {El objetivo del estudio que presentamos es comparar la eficacia como estrat egia de selecci{\'o}n de {\'\i}tems de tres algo ritmos dife rentes: a) basado en m{\'a}xima info rmaci{\'o}n; b) basado en m{\'\i}nima entrop{\'\i}a; y c) mixto m{\'\i}nima entrop{\'\i}a en los {\'\i}tems iniciales y m{\'a}xima info rmaci{\'o}n en el resto; bajo la hip{\'o}tesis de que el algo ritmo mixto, puede dotar al TAI de mayor eficacia. Las simulaciones de procesos TAI se re a l i z a ron sobre un banco de 28 {\'\i}tems de respuesta graduada calibrado seg{\'u}n el modelo de Samejima, tomando como respuesta al TAI la respuesta ori ginal de los sujetos que fueron utilizados para la c a l i b raci{\'o}n. Los resultados iniciales mu e s t ran c{\'o}mo el cri t e rio mixto es m{\'a}s eficaz que cualquiera de los otros dos tomados indep e n d i e n t e m e n t e. Dicha eficacia se maximiza cuando el algo ritmo de m{\'\i}nima entrop{\'\i}a se re s t ri n ge a la selecci{\'o}n de los pri m e ros {\'\i}tems del TAI, ya que con las respuestas a estos pri m e ros {\'\i}tems la estimaci{\'o}n de q comienza a ser re l evante y el algo ritmo de m{\'a}xima informaci{\'o}nse optimiza.Item selection algo rithms in computeri zed adap t ive testing. The aim of this paper is to compare the efficacy of three different item selection algo rithms in computeri zed adap t ive testing (CAT). These algorithms are based as follows: the first one is based on Item Info rm ation, the second one on Entropy, and the last algo rithm is a mixture of the two previous ones. The CAT process was simulated using an emotional adjustment item bank. This item bank contains 28 graded items in six categories , calibrated using Samejima (1969) Graded Response Model. The initial results show that the mixed criterium algorithm performs better than the other ones.}, keywords = {computerized adaptive testing}, author = {Dorronsoro, J. R. and Santa-Cruz, C. and Rubio Franco, V. J. and Aguado Garc{\'\i}a, D.} } @conference {1204, title = {Applying specific information item selection to a passage-based test}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2000}, address = {New Orleans, LA, April}, author = {Thompson, T.D. and Davey, T.} } @conference {1240, title = {Assembling parallel item pools for computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2000}, note = {$\#$WA00-02}, address = {New Orleans}, author = {Wang, T. and Fan, M. Yi, Q. and Ban, J. C. and Zhu, D.} } @article {402, title = {Capitalization on item calibration error in adaptive testing}, journal = {Applied Measurement in Education}, volume = {13}, number = {1}, year = {2000}, note = {References .Lawrence Erlbaum, US}, pages = {35-53}, abstract = {(from the journal abstract) In adaptive testing, item selection is sequentially optimized during the test. Because the optimization takes place over a pool of items calibrated with estimation error, capitalization on chance is likely to occur. How serious the consequences of this phenomenon are depends not only on the distribution of the estimation errors in the pool or the conditional ratio of the test length to the pool size given ability, but may also depend on the structure of the item selection criterion used. A simulation study demonstrated a dramatic impact of capitalization on estimation errors on ability estimation. Four different strategies to minimize the likelihood of capitalization on error in computerized adaptive testing are discussed.}, keywords = {computerized adaptive testing}, author = {van der Linden, W. J. and Glas, C. A. W.} } @article {368, title = {CAT administration of language placement examinations}, journal = {Journal of Applied Measurement}, volume = {1}, number = {3}, year = {2000}, note = {1529-7713Journal Article}, pages = {292-302}, abstract = {This article describes the development of a computerized adaptive test for Cegep de Jonquiere, a community college located in Quebec, Canada. Computerized language proficiency testing allows the simultaneous presentation of sound stimuli as the question is being presented to the test-taker. With a properly calibrated bank of items, the language proficiency test can be offered in an adaptive framework. By adapting the test to the test-taker{\textquoteright}s level of ability, an assessment can be made with significantly fewer items. We also describe our initial attempt to detect instances in which "cheating low" is occurring. In the "cheating low" situation, test-takers deliberately answer questions incorrectly, questions that they are fully capable of answering correctly had they been taking the test honestly.}, keywords = {*Language, *Software, Aptitude Tests/*statistics \& numerical data, Educational Measurement/*statistics \& numerical data, Humans, Psychometrics, Reproducibility of Results, Research Support, Non-U.S. Gov{\textquoteright}t}, author = {Stahl, J. and Bergstrom, B. and Gershon, R. C.} } @inbook {1938, title = {Caveats, pitfalls, and unexpected consequences of implementing large-scale computerized testing}, year = {2000}, address = {Wainer, H. (Ed). Computerized adaptive testing: A primer (2nd ed.). pp. 271-299. Mahwah, NJ: Lawrence Erlbaum Associates.}, author = {Wainer, H., and Eignor, D. R.} } @booklet {1510, title = {CBTS: Computer-based testing simulation and analysis [computer software]}, year = {2000}, address = {Amherst, MA: University of Massachusetts, School of Education}, author = {Robin, F.} } @conference {986, title = {Change in distribution of latent ability with item position in CAT sequence}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education in New Orleans}, year = {2000}, note = {{PDF file, 103 KB}}, address = {LA}, author = {Krass, I. A.} } @article {573, title = {The choice of item difficulty in self adapted testing}, journal = {European Journal of Psychological Assessment}, volume = {16}, year = {2000}, pages = {3-12}, author = {Hontangas, P. and Ponsoda, V. and Olea, J. and Wise, S. L.} } @conference {1124, title = {Classification accuracy and test security for a computerized adaptive mastery test calibrated with different IRT models}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2000}, address = {New Orleans LA}, author = {Robin, F. and Xing, D. and Scrams, D. and Potenza, M.} } @article {309, title = {A comparison of computerized adaptive testing and multistage testing}, journal = {Dissertation Abstracts International: Section B: the Sciences \& Engineering}, volume = {60}, number = {11-B}, year = {2000}, pages = {5829}, abstract = {There is considerable evidence to show that computerized-adaptive testing (CAT) and multi-stage testing (MST) are viable frameworks for testing. With many testing organizations looking to move towards CAT or MST, it is important to know what framework is superior in different situations and at what cost in terms of measurement. What was needed is a comparison of the different testing procedures under various realistic testing conditions. This dissertation addressed the important problem of the increase or decrease in accuracy of ability estimation in using MST rather than CAT. The purpose of this study was to compare the accuracy of ability estimates produced by MST and CAT while keeping some variables fixed and varying others. A simulation study was conducted to investigate the effects of several factors on the accuracy of ability estimation using different CAT and MST designs. The factors that were manipulated are the number of stages, the number of subtests per stage, and the number of items per subtest. Kept constant were test length, distribution of subtest information, method of determining cut-points on subtests, amount of overlap between subtests, and method of scoring total test. The primary question of interest was, given a fixed test length, how many stages and many subtests per stage should there be to maximize measurement precision? Furthermore, how many items should there be in each subtest? Should there be more in the routing test or should there be more in the higher stage tests? Results showed that, in general, increasing the number of stages from two to three decreased the amount of errors in ability estimation. Increasing the number of subtests from three to five increased the accuracy of ability estimates as well as the efficiency of the MST designs relative to the P\&P and CAT designs at most ability levels (-.75 to 2.25). Finally, at most ability levels (-.75 to 2.25), varying the number of items per stage had little effect on either the resulting accuracy of ability estimates or the relative efficiency of the MST designs to the P\&P and CAT designs. (PsycINFO Database Record (c) 2003 APA, all rights reserved).}, keywords = {computerized adaptive testing}, author = {Patsula, L N.} } @article {70, title = {A comparison of item selection rules at the early stages of computerized adaptive testing}, journal = {Applied Psychological Measurement}, volume = {24}, number = {3}, year = {2000}, pages = {241-255}, abstract = {The effects of 5 item selection rules--Fisher information (FI), Fisher interval information (FII), Fisher information with a posterior distribution (FIP), Kullback-Leibler information (KL), and Kullback-Leibler information with a posterior distribution (KLP)--were compared with respect to the efficiency and precision of trait (θ) estimation at the early stages of computerized adaptive testing (CAT). FII, FIP, KL, and KLP performed marginally better than FI at the early stages of CAT for θ=-3 and -2. For tests longer than 10 items, there appeared to be no precision advantage for any of the selection rules. (PsycINFO Database Record (c) 2005 APA ) (journal abstract)}, keywords = {Adaptive Testing, Computer Assisted Testing, Item Analysis (Test), Statistical Estimation computerized adaptive testing}, author = {Chen, S-Y. and Ankenmann, R. D. and Chang, Hua-Hua} } @article {502, title = {A comparison of item selection rules at the early stages of computerized adaptive testing}, journal = {Applied Psychological Measurement}, volume = {24}, year = {2000}, pages = {241-255}, author = {Chen, S.Y. and Ankenmann, R. D. and Chang, Hua-Hua} } @inbook {1835, title = {Computer-adaptive sequential testing}, year = {2000}, address = {W. J. van der Linden (Ed.), Computerized Adaptive Testing: Theory and Practice (pp. 289-209). Dordrecht, The Netherlands: Kluwer.}, author = {Luecht, RM and Nungester, R. J.} } @inbook {255, title = {Computer-adaptive testing: A methodology whose time has come}, booktitle = {Development of Computerised Middle School Achievement Tests}, volume = {69}, year = {2000}, publisher = {MESA}, organization = {MESA}, address = {Chicago, IL. USA}, keywords = {computerized adaptive testing}, author = {Linacre, J. M.}, editor = {Kang, U. and Jean, E. and Linacre, J. M.} } @booklet {1442, title = {Computer-adaptive testing: A methodology whose time has come. MESA Memorandum No 9}, year = {2000}, address = {Chicago : MESA psychometric laboratory, Unversity of Chicago.}, author = {Linacre, J. M.} } @article {329, title = {Computerization and adaptive administration of the NEO PI-R}, journal = {Assessment}, volume = {7}, number = {4}, year = {2000}, note = {1073-1911 (Print)Journal Article}, pages = {347-64}, abstract = {This study asks, how well does an item response theory (IRT) based computerized adaptive NEO PI-R work? To explore this question, real-data simulations (N = 1,059) were used to evaluate a maximum information item selection computerized adaptive test (CAT) algorithm. Findings indicated satisfactory recovery of full-scale facet scores with the administration of around four items per facet scale. Thus, the NEO PI-R could be reduced in half with little loss in precision by CAT administration. However, results also indicated that the CAT algorithm was not necessary. We found that for many scales, administering the "best" four items per facet scale would have produced similar results. In the conclusion, we discuss the future of computerized personality assessment and describe the role IRT methods might play in such assessments.}, keywords = {*Personality Inventory, Algorithms, California, Diagnosis, Computer-Assisted/*methods, Humans, Models, Psychological, Psychometrics/methods, Reproducibility of Results}, author = {Reise, S. P. and Henson, J. M.} } @article {607, title = {Computerized adaptive administration of the self-evaluation examination}, journal = {AANA.J}, volume = {68}, year = {2000}, pages = {226-31}, author = {LaVelle, T. and Zaglaniczny, K., and Spitzer, L.E.} } @booklet {1336, title = {Computerized adaptive rating scales (CARS): Development and evaluation of the concept}, year = {2000}, address = {(Institute Rep No. 350). Tampa FL: Personnel Decisions Research Institute.}, author = {Borman, W. C. and Hanson, M. A. and Kubisiak, U. C. and Buck, D. E.} } @book {1710, title = {Computerized adaptive testing: A primer (2nd edition)}, year = {2000}, address = {Hillsdale, N. J. : Lawrence Erlbaum Associates}, author = {Wainer, H., and Dorans, N. and Eignor, D. R. and Flaugher, R. and Green, B. F. and Mislevy, R. and Steinberg, L. and Thissen, D.} } @article {115, title = {Computerized adaptive testing for classifying examinees into three categories}, journal = {Educational and Psychological Measurement}, volume = {60}, number = {5}, year = {2000}, pages = {713-34}, abstract = {The objective of this study was to explore the possibilities for using computerized adaptive testing in situations in which examinees are to be classified into one of three categories.Testing algorithms with two different statistical computation procedures are described and evaluated. The first computation procedure is based on statistical testing and the other on statistical estimation. Item selection methods based on maximum information (MI) considering content and exposure control are considered. The measurement quality of the proposed testing algorithms is reported. The results of the study are that a reduction of at least 22\% in the mean number of items can be expected in a computerized adaptive test (CAT) compared to an existing paper-and-pencil placement test. Furthermore, statistical testing is a promising alternative to statistical estimation. Finally, it is concluded that imposing constraints on the MI selection strategy does not negatively affect the quality of the testing algorithms}, keywords = {computerized adaptive testing, Computerized classification testing}, author = {Theo Eggen and Straetmans, G. J. J. M.} } @book {403, title = {Computerized adaptive testing: Theory and practice}, year = {2000}, publisher = {Kluwer Academic Publishers}, organization = {Kluwer Academic Publishers}, address = {Dordrecht, The Netherlands}, author = {van der Linden, W. J. and Glas, C. A. W.} } @conference {1110, title = {Computerized testing {\textendash} the adolescent years: Juvenile delinquent or positive role model}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2000}, address = {New Orleans LA}, author = {Reckase, M. D.} } @inbook {1922, title = {Constrained adaptive testing with shadow tests}, year = {2000}, address = {W. J. van der Linden and C. A. W. Glas (eds.), Computerized adaptive testing: Theory and practice (pp.27-52). Norwell MA: Kluwer.}, author = {van der Linden, W. J.} } @book {1664, title = {The construction and evaluation of a dynamic computerised adaptive test for the measurement of learning potential}, year = {2000}, address = {Unpublished D. Litt et Phil dissertation. University of South Africa, Pretoria.}, author = {De Beer, M.} } @conference {998, title = {Content balancing in stratified computerized adaptive testing designs}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2000}, note = {{PDF file, 427 KB}}, address = {New Orleans, LA}, author = {Leung, C-K.. and Chang, Hua-Hua and Hau, K-T.} } @inbook {1925, title = {Cross-validating item parameter estimation in adaptive testing}, year = {2000}, address = {A. Boorsma, M. A. J. van Duijn, and T. A. B. Snijders (Eds.) (pp. 205-219), Essays on item response theory. New York: Springer.}, author = {van der Linden, W. J. and Glas, C. A. W.} } @inbook {416, title = {Designing item pools for computerized adaptive testing}, booktitle = {Computerized adaptive testing: Theory and practice}, year = {2000}, pages = {149{\textendash}162}, publisher = {Kluwer Academic Publishers}, organization = {Kluwer Academic Publishers}, address = {Dendrecht, The Netherlands}, author = {Veldkamp, B. P. and van der Linden, W. J.} } @inbook {410, title = {Detecting person misfit in adaptive testing using statistical process control techniques}, booktitle = {Computer adaptive testing: Theory and practice}, year = {2000}, pages = {201-219}, publisher = {Kluwer Academic.}, organization = {Kluwer Academic.}, address = {Dordrecht, The Netherlands}, keywords = {person Fit}, author = {van Krimpen-Stoop, E. M. L. A. and Meijer, R. R.} } @inbook {1814, title = {Detecting person misfit in adaptive testing using statistical process control techniques}, year = {2000}, address = {W. J. van der Linden, and C. A. W. Glas (Editors). Computerized Adaptive Testing: Theory and Practice. Norwell MA: Kluwer.}, author = {van Krimpen-Stoop, E. M. L. A. and Meijer, R. R.} } @conference {1091, title = {Detecting test-takers who have memorized items in computerized-adaptive testing and muti-stage testing: A comparison}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2000}, address = {New Orleans LA}, author = {Patsula, L N. and McLeod, L. D.} } @article {755, title = {Detection of known items in adaptive testing with a statistical quality control method}, journal = {Journal of Educational and Behavioral Statistics}, volume = {25}, year = {2000}, pages = {373-389}, author = {Veerkamp, W. J. J. and Glas, C. E. W.} } @booklet {1543, title = {Detection of person misfit in computerized adaptive testing with polytomous items (Research Report 00-01)}, year = {2000}, address = {Enschede, The Netherlands: University of Twente, Faculty of Educational Science and Technology, Department of Measurement and Data Analysis}, author = {van Krimpen-Stoop, E. M. L. A. and Meijer, R. R.} } @booklet {1511, title = {Development and evaluation of test assembly procedures for computerized adaptive testing (Laboratory of Psychometric and Evaluative Methods Research Report No 391)}, year = {2000}, address = {Amherst MA: University of Massachusetts, School of Education.}, author = {Robin, F.} } @article {378, title = {The development of a computerized version of Vandenberg{\textquoteright}s mental rotation test and the effect of visuo-spatial working memory loading}, journal = {Dissertation Abstracts International Section A: Humanities and Social Sciences}, volume = {60}, number = {11-A}, year = {2000}, pages = {3938}, abstract = {This dissertation focused on the generation and evaluation of web-based versions of Vandenberg{\textquoteright}s Mental Rotation Test. Memory and spatial visualization theory were explored in relation to the addition of a visuo-spatial working memory component. Analysis of the data determined that there was a significant difference between scores on the MRT Computer and MRT Memory test. The addition of a visuo-spatial working memory component did significantly affect results at the .05 alpha level. Reliability and discrimination estimates were higher on the MRT Memory version. The computerization of the paper and pencil version on the MRT did not significantly effect scores but did effect the time required to complete the test. The population utilized in the quasi-experiment consisted of 107 university students from eight institutions in engineering graphics related courses. The subjects completed two researcher developed, Web-based versions of Vandenberg{\textquoteright}s Mental Rotation Test and the original paper and pencil version of the Mental Rotation Test. One version of the test included a visuo-spatial working memory loading. Significant contributions of this study included developing and evaluating computerized versions of Vandenberg{\textquoteright}s Mental Rotation Test. Previous versions of Vandenberg{\textquoteright}s Mental Rotation Test did not take advantage of the ability of the computer to incorporate an interaction factor, such as a visuo-spatial working memory loading, into the test. The addition of an interaction factor results in a more discriminate test which will lend itself well to computerized adaptive testing practices. Educators in engineering graphics related disciplines should strongly consider the use of spatial visualization tests to aid in establishing the effects of modern computer systems on fundamental design/drafting skills. Regular testing of spatial visualization skills will result assist in the creation of a more relevant curriculum. Computerized tests which are valid and reliable will assist in making this task feasible. (PsycINFO Database Record (c) 2005 APA )}, keywords = {Computer Assisted Testing, Mental Rotation, Short Term Memory computerized adaptive testing, Test Construction, Test Validity, Visuospatial Memory}, author = {Strong, S. D.} } @article {351, title = {Diagnostische programme in der Demenzfr{\"u}herkennung: Der Adaptive Figurenfolgen-Lerntest (ADAFI) [Diagnostic programs in the early detection of dementia: The Adaptive Figure Series Learning Test (ADAFI)]}, journal = {Zeitschrift f{\"u}r Gerontopsychologie \& -Psychiatrie}, volume = {13}, number = {1}, year = {2000}, pages = {16-29}, abstract = {Zusammenfassung: Untersucht wurde die Eignung des computergest{\"u}tzten Adaptiven Figurenfolgen-Lerntests (ADAFI), zwischen gesunden {\"a}lteren Menschen und {\"a}lteren Menschen mit erh{\"o}htem Demenzrisiko zu differenzieren. Der im ADAFI vorgelegte Aufgabentyp der fluiden Intelligenzdimension (logisches Auff{\"u}llen von Figurenfolgen) hat sich in mehreren Studien zur Erfassung des intellektuellen Leistungspotentials (kognitive Plastizit{\"a}t) {\"a}lterer Menschen als g{\"u}nstig f{\"u}r die genannte Differenzierung erwiesen. Aufgrund seiner Konzeption als Diagnostisches Programm f{\"a}ngt der ADAFI allerdings einige Kritikpunkte an Vorgehensweisen in diesen bisherigen Arbeiten auf. Es konnte gezeigt werden, a) da{\ss} mit dem ADAFI deutliche Lokationsunterschiede zwischen den beiden Gruppen darstellbar sind, b) da{\ss} mit diesem Verfahren eine gute Vorhersage des mentalen Gesundheitsstatus der Probanden auf Einzelfallebene gelingt (Sensitivit{\"a}t: 80 \%, Spezifit{\"a}t: 90 \%), und c) da{\ss} die Vorhersageleistung statusdiagnostischer Tests zur Informationsverarbeitungsgeschwindigkeit und zum Arbeitsged{\"a}chtnis geringer ist. Die Ergebnisse weisen darauf hin, da{\ss} die plastizit{\"a}tsorientierte Leistungserfassung mit dem ADAFI vielversprechend f{\"u}r die Fr{\"u}hdiagnostik dementieller Prozesse sein k{\"o}nnte.The aim of this study was to examine the ability of the computerized Adaptive Figure Series Learning Test (ADAFI) to differentiate among old subjects at risk for dementia and old healthy controls. Several studies on the subject of measuring the intellectual potential (cognitive plasticity) of old subjects have shown the usefulness of the fluid intelligence type of task used in the ADAFI (completion of figure series) for this differentiation. Because the ADAFI has been developed as a Diagnostic Program it is able to counter some critical issues in those studies. It was shown a) that distinct differences between both groups are revealed by the ADAFI, b) that the prediction of the cognitive health status of individual subjects is quite good (sensitivity: 80 \%, specifity: 90 \%), and c) that the prediction of the cognitive health status with tests of processing speed and working memory is worse than with the ADAFI. The results indicate that the ADAFI might be a promising plasticity-oriented tool for the measurement of cognitive decline in the elderly, and thus might be useful for the early detection of dementia.}, keywords = {Adaptive Testing, At Risk Populations, Computer Assisted Diagnosis, Dementia}, author = {Schreiber, M. D. and Schneider, R. J. and Schweizer, A. and Beckmann, J. F. and Baltissen, R.} } @article {646, title = {Does adaptive testing violate local independence?}, journal = {Psychometrika}, volume = {65}, year = {2000}, pages = {149-156}, author = {Mislevy, R. J. and Chang, Hua-Hua} } @booklet {1440, title = {Effects of item-selection criteria on classification testing with the sequential probability ratio test (Research Report 2000-8)}, year = {2000}, note = {$\#$LI00-8}, address = {Iowa City, IA: American College Testing}, author = {Lin, C.-J. and Spray, J. A.} } @conference {837, title = {Effects of nonequivalence of item pools on ability estimates in CAT}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2000}, note = {PDF file, 657 K}, address = {New Orleans LA}, author = {Ban, J. C. and Wang, T. and Yi, Q. and Harris, D. J.} } @article {179, title = {Emergence of item response modeling in instrument development and data analysis}, journal = {Medical Care}, volume = {38}, number = {Suppl. 9}, year = {2000}, pages = {II60-II65}, keywords = {Computer Assisted Testing, Health, Item Response Theory, Measurement, Statistical Validity computerized adaptive testing, Test Construction, Treatment Outcomes}, author = {Hambleton, R. K.} } @booklet {1344, title = {Estimating item parameters from classical indices for item pool development with a computerized classification test (ACT Research 2000-4)}, year = {2000}, address = {Iowa City IA, ACT, Inc}, author = {Chang, C.-Y. and Kalohn, J.C. and Lin, C.-J. and Spray, J.} } @article {205, title = {Estimating Item Parameters from Classical Indices for Item Pool Development with a Computerized Classification Test. }, number = {Research Report 2000-4}, year = {2000}, institution = {ACT, Inc.}, address = {Iowa City, Iowa}, author = {Huang, C.-Y. and Kalohn, J.C. and Lin, C.-J. and Spray, J. A.} } @booklet {1409, title = {Estimating item parameters from classical indices for item pool development with a computerized classification test (Research Report 2000-4)}, year = {2000}, address = {Iowa City IA: ACT Inc}, author = {Huang, C.-Y. and Kalohn, J.C. and Lin, C.-J. and Spray, J.} } @article {74, title = {Estimation of trait level in computerized adaptive testing}, journal = {Applied Psychological Measurement}, volume = {24}, number = {3}, year = {2000}, pages = {257-265}, abstract = {Notes that in computerized adaptive testing (CAT), a examinee{\textquoteright}s trait level (θ) must be estimated with reasonable accuracy based on a small number of item responses. A successful implementation of CAT depends on (1) the accuracy of statistical methods used for estimating θ and (2) the efficiency of the item-selection criterion. Methods of estimating θ suitable for CAT are reviewed, and the differences between Fisher and Kullback-Leibler information criteria for selecting items are discussed. The accuracy of different CAT algorithms was examined in an empirical study. The results show that correcting θ estimates for bias was necessary at earlier stages of CAT, but most CAT algorithms performed equally well for tests of 10 or more items. (PsycINFO Database Record (c) 2005 APA )}, keywords = {(Statistical), Adaptive Testing, Computer Assisted Testing, Item Analysis, Statistical Estimation computerized adaptive testing}, author = {Cheng, P. E. and Liou, M.} } @article {494, title = {ETS finds flaws in the way online GRE rates some students}, journal = {Chronicle of Higher Education}, volume = {47}, year = {2000}, pages = {a47}, author = {Carlson, S.} } @conference {898, title = {An examination of exposure control and content balancing restrictions on item selection in CATs using the partial credit model}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2000}, address = {New Orleans, LA}, author = {Davis, L. L. and Pastor, D. A. and Dodd, B. G. and Chiang, C. and Fitzpatrick, S.} } @article {41, title = {An examination of the reliability and validity of performance ratings made using computerized adaptive rating scales}, journal = {Dissertation Abstracts International: Section B: The Sciences and Engineering}, volume = {61}, number = {1-B}, year = {2000}, pages = {570}, abstract = {This study compared the psychometric properties of performance ratings made using recently-developed computerized adaptive rating scales (CARS) to the psyc hometric properties of ratings made using more traditional paper-and-pencil rati ng formats, i.e., behaviorally-anchored and graphic rating scales. Specifically, the reliability, validity and accuracy of the performance ratings from each for mat were examined. One hundred twelve participants viewed six 5-minute videotape s of office situations and rated the performance of a target person in each vide otape on three contextual performance dimensions-Personal Support, Organizationa l Support, and Conscientious Initiative-using CARS and either behaviorally-ancho red or graphic rating scales. Performance rating properties were measured using Shrout and Fleiss{\textquoteright}s intraclass correlation (2, 1), Borman{\textquoteright}s differential accurac y measure, and Cronbach{\textquoteright}s accuracy components as indexes of rating reliability, validity, and accuracy, respectively. Results found that performance ratings mad e using the CARS were significantly more reliable and valid than performance rat ings made using either of the other formats. Additionally, CARS yielded more acc urate performance ratings than the paper-and-pencil formats. The nature of the C ARS system (i.e., its adaptive nature and scaling methodology) and its paired co mparison judgment task are offered as possible reasons for the differences found in the psychometric properties of the performance ratings made using the variou s rating formats. (PsycINFO Database Record (c) 2005 APA )}, keywords = {Adaptive Testing, Computer Assisted Testing, Performance Tests, Rating Scales, Reliability, Test, Test Validity}, author = {Buck, D. E.} } @article {364, title = {An exploratory analysis of item parameters and characteristics that influence item level response time}, journal = {Dissertation Abstracts International Section A: Humanities and Social Sciences}, volume = {61}, number = {5-A}, year = {2000}, pages = {1812}, abstract = {This research examines the relationship between item level response time and (1) item discrimination, (2) item difficulty, (3) word count, (4) item type, and (5) whether a figure is included in an item. Data are from the Graduate Management Admission Test, which is currently offered only as a computerized adaptive test. Analyses revealed significant differences in response time between the five item types: problem solving, data sufficiency, sentence correction, critical reasoning, and reading comprehension. For this reason, the planned pairwise and complex analyses were run within each item type. Pairwise curvilinear regression analyses explored the relationship between response time and item discrimination, item difficulty, and word count. Item difficulty significantly contributed to the prediction of response time for each item type; two of the relationships were significantly quadratic. Item discrimination significantly contributed to the prediction of response time for only two of the item types; one revealed a quadratic relationship and the other a cubic relationship. Word count had significant linear relationship with response time for all the item types except reading comprehension, for which there was no significant relationship. Multiple regression analyses using word count, item difficulty, and item discrimination predicted between 35.4\% and 71.4\% of the variability in item response time across item types. The results suggest that response time research should consider the type of item that is being administered and continue to explore curvilinear relationships between response time and its predictor variables. (PsycINFO Database Record (c) 2005 APA )}, keywords = {Item Analysis (Statistical), Item Response Theory, Problem Solving, Reaction Time, Reading Comprehension, Reasoning}, author = {Smith, Russell Winsor} } @booklet {1619, title = {A framework for comparing adaptive test designs}, year = {2000}, address = {Unpublished manuscript}, author = {Stocking, M. L.} } @conference {1093, title = {From simulation to application: Examinees react to computerized testing}, booktitle = {Paper presented at the annual meeting of the National Council of Measurement in Education}, year = {2000}, address = {New Orleans, April 2000}, author = {Pommerich, M and Burden, T.} } @inbook {1858, title = {The GRE computer adaptive test: Operational issues}, year = {2000}, address = {W. J. van der Linden and C. A. W. Glas (Eds.), Computerized adaptive testing: Theory and practice (pp. 75-99). Dordrecht, Netherlands: Kluwer.}, author = {Mills, C. N. and Steffen, M.} } @article {302, title = {The impact of receiving the same items on consecutive computer adaptive test administrations}, journal = {Journal of Applied Measurement}, volume = {1}, number = {2}, year = {2000}, note = {Richard M Smith, US}, pages = {131-151}, abstract = {Addresses item exposure in a Computerized Adaptive Test (CAT) when the item selection algorithm is permitted to present examinees with questions that they have already been asked in a previous test administration. The data were from a national certification exam in medical technology. The responses of 178 repeat examinees were compared. The results indicate that the combined use of an adaptive algorithm to select items and latent trait theory to estimate person ability provides substantial protection from score contamination. The implications for constraints that prohibit examinees from seeing an item twice are discussed. (PsycINFO Database Record (c) 2002 APA, all rights reserved).}, author = {O{\textquoteright}Neill, T. and Lunz, M. E. and Thiede, K.} } @conference {1028, title = {Implementing the computer-adaptive sequential testing (CAST) framework to mass produce high quality computer-adaptive and mastery tests}, booktitle = {Symposium paper presented at the Annual Meeting of the National Council on Measurement in Education}, year = {2000}, address = {New Orleans, LA}, author = {Luecht, RM} } @article {409, title = {An integer programming approach to item bank design}, journal = {Applied Psychological Measurement}, volume = {24}, number = {2}, year = {2000}, pages = {139-150}, abstract = {An integer programming approach to item bank design is presented that can be used to calculate an optimal blueprint for an item bank, in order to support an existing testing program. The results are optimal in that they minimize the effort involved in producing the items as revealed by current item writing patterns. Also presented is an adaptation of the models, which can be used as a set of monitoring tools in item bank management. The approach is demonstrated empirically for an item bank that was designed for the Law School Admission Test. }, keywords = {Aptitude Measures, Item Analysis (Test), Item Response Theory, Test Construction, Test Items}, author = {van der Linden, W. J. and Veldkamp, B. P. and Reese, L. M.} } @booklet {1622, title = {An investigation of approaches to computerizing the GRE subject tests (GRE Board Professional Report No 93-08P; Educational Testing Service Research Report 00-4)}, year = {2000}, note = {$\#$ST00-01}, address = {Princeton NJ: Educational Testing Service.}, author = {Stocking, M. L. and Smith, R. and Swanson, L.} } @inbook {1774, title = {Item calibration and parameter drift}, year = {2000}, address = {W. J. van der linden and C. A. W. Glas (Eds.). Computerized adaptive testing: Theory and practice (pp.183-199). Norwell MA: Kluwer Academic.}, author = {Glas, C. A. W.} } @article {657, title = {Item exposure control in computer-adaptive testing: The use of freezing to augment stratification}, journal = {Florida Journal of Educational Research}, volume = {40}, year = {2000}, pages = {28-52}, author = {Parshall, C. and Harmes, J. C. and Kromrey, J. D.} } @inbook {1768, title = {Item pools}, year = {2000}, address = {Wainer, H. (2000). Computerized adaptive testing: a primer. Mahwah, NJ: Erlbaum.}, author = {Flaugher, R.} } @article {191, title = {Item response theory and health outcomes measurement in the 21st century}, journal = {Medical Care}, volume = {38}, number = {9 Suppl II}, year = {2000}, note = {204349670025-7079Journal Article}, pages = {II28-II42}, abstract = {Item response theory (IRT) has a number of potential advantages over classical test theory in assessing self-reported health outcomes. IRT models yield invariant item and latent trait estimates (within a linear transformation), standard errors conditional on trait level, and trait estimates anchored to item content. IRT also facilitates evaluation of differential item functioning, inclusion of items with different response formats in the same scale, and assessment of person fit and is ideally suited for implementing computer adaptive testing. Finally, IRT methods can be helpful in developing better health outcome measures and in assessing change over time. These issues are reviewed, along with a discussion of some of the methodological and practical challenges in applying IRT methods.}, keywords = {*Models, Statistical, Activities of Daily Living, Data Interpretation, Statistical, Health Services Research/*methods, Health Surveys, Human, Mathematical Computing, Outcome Assessment (Health Care)/*methods, Research Design, Support, Non-U.S. Gov{\textquoteright}t, Support, U.S. Gov{\textquoteright}t, P.H.S., United States}, author = {Hays, R. D. and Morales, L. S. and Reise, S. P.} } @article {145, title = {Item selection algorithms in computerized adaptive testing}, journal = {Psicothema}, volume = {12}, number = {Suppl 2}, year = {2000}, note = {Spanish .Algoritmo mixto minima entropia-maxima informacion para la seleccion de items en un test adaptativo informatizado..Universidad de Oviedo, Spain}, pages = {12-14}, abstract = {Studied the efficacy of 3 different item selection algorithms in computerized adaptive testing. Ss were 395 university students (aged 20-25 yrs) in Spain. Ss were asked to submit answers via computer to 28 items of a personality questionnaire using item selection algorithms based on maximum item information, entropy, or mixed item-entropy algorithms. The results were evaluated according to ability of Ss to use item selection algorithms and number of questions. Initial results indicate that mixed criteria algorithms were more efficient than information or entropy algorithms for up to 15 questionnaire items, but that differences in efficiency decreased with increasing item number. Implications for developing computer adaptive testing methods are discussed. (PsycINFO Database Record (c) 2002 APA, all rights reserved).}, author = {Garcia, David A. and Santa Cruz, C. and Dorronsoro, J. R. and Rubio Franco, V. J.} } @inbook {404, title = {Item selection and ability estimation in adaptive testing}, booktitle = {Computerized adaptive testing: Theory and practice}, year = {2000}, pages = {1{\textendash}25}, publisher = {Kluwer Academic Publishers}, organization = {Kluwer Academic Publishers}, address = {Dordrecht, The Netherlands}, author = {van der Linden, W. J. and Pashley, P. J.} } @book {1693, title = {La distribution dchantillonnage en testing adaptatif en fonction de deux rgles darrt : selon lerreur type et selon le nombre ditems administrs [Sampling distribution of the proficiency estimate in computerized adaptive testing according to two stopping...}, year = {2000}, note = {.}, address = {Doctoral thesis, Montreal: University of Montreal}, author = {Rache, G.} } @article {232, title = {Lagrangian relaxation for constrained curve-fitting with binary variables: Applications in educational testing}, journal = {Dissertation Abstracts International Section A: Humanities and Social Sciences}, volume = {61}, number = {3-A}, year = {2000}, pages = {1063}, abstract = {This dissertation offers a mathematical programming approach to curve fitting with binary variables. Various Lagrangian Relaxation (LR) techniques are applied to constrained curve fitting. Applications in educational testing with respect to test assembly are utilized. In particular, techniques are applied to both static exams (i.e. conventional paper-and-pencil (P\&P)) and adaptive exams (i.e. a hybrid computerized adaptive test (CAT) called a multiple-forms structure (MFS)). This dissertation focuses on the development of mathematical models to represent these test assembly problems as constrained curve-fitting problems with binary variables and solution techniques for the test development. Mathematical programming techniques are used to generate parallel test forms with item characteristics based on item response theory. A binary variable is used to represent whether or not an item is present on a form. The problem of creating a test form is modeled as a network flow problem with additional constraints. In order to meet the target information and the test characteristic curves, a Lagrangian relaxation heuristic is applied to the problem. The Lagrangian approach works by multiplying the constraint by a "Lagrange multiplier" and adding it to the objective. By systematically varying the multiplier, the test form curves approach the targets. This dissertation explores modifications to Lagrangian Relaxation as it is applied to the classical paper-and-pencil exams. For the P\&P exams, LR techniques are also utilized to include additional practical constraints to the network problem, which limit the item selection. An MFS is a type of a computerized adaptive test. It is a hybrid of a standard CAT and a P\&P exam. The concept of an MFS will be introduced in this dissertation, as well as, the application of LR as it is applied to constructing parallel MFSs. The approach is applied to the Law School Admission Test for the assembly of the conventional P\&P test as well as an experimental computerized test using MFSs. (PsycINFO Database Record (c) 2005 APA )}, keywords = {Analysis, Educational Measurement, Mathematical Modeling, Statistical}, author = {Koppel, N. B.} } @book {1662, title = {Learning Potential Computerised Adaptive Test (LPCAT): Technical Manual}, year = {2000}, note = {$\#$deBE00-01}, address = {Pretoria: UNISA}, author = {De Beer, M.} } @book {1663, title = {Learning Potential Computerised Adaptive Test (LPCAT): User{\textquoteright}s Manual}, year = {2000}, note = {$\#$deBE00-02}, address = {Pretoria: UNISA}, author = {De Beer, M.} } @article {764, title = {Limiting answer review and change on computerized adaptive vocabulary tests: Psychometric and attitudinal results}, journal = {Journal of Educational Measurement}, volume = {37}, year = {2000}, pages = {21-38}, author = {Vispoel, W. P. and Hendrickson, A. B. and Bleiler, T.} } @article {203, title = {Los tests adaptativos informatizados en la frontera del siglo XXI: Una revisi{\'o}n [Computerized adaptive tests at the turn of the 21st century: A review]}, journal = {Metodolog{\'\i}a de las Ciencias del Comportamiento}, volume = {2}, number = {2}, year = {2000}, pages = {183-216}, keywords = {computerized adaptive testing}, isbn = {1575-9105}, author = {Hontangas, P. and Ponsoda, V. and Olea, J. and Abad, F. J.} } @inbook {1897, title = {Methods of controlling the exposure of items in CAT}, year = {2000}, address = {W. J. van der Linden and C. A. W. Glas (eds.), Computerized adaptive testing: Theory and practice (pp. 163-182). Norwell MA: Kluwer.}, author = {Stocking, M. L. and Lewis, C.} } @inbook {1931, title = {A minimax solution for sequential classification problems}, year = {2000}, note = {$\#$VO00101}, address = {H. A. L. Kiers, J.-P.Rasson, P. J. F. Groenen, and M. Schader (Eds.), Data analysis, classification, and related methods (pp. 121-126). Berlin: Springer. }, author = {Vos, H. J.} } @inbook {158, title = {MML and EAP estimation in testlet-based adaptive testing}, booktitle = {Computerized adaptive testing: Theory and practice}, year = {2000}, pages = {271-287}, publisher = {Kluwer Academic Publishers}, organization = {Kluwer Academic Publishers}, address = {Dordrecht, The Netherlands}, author = {Glas, C. A. W. and Wainer, H., and Bradlow, E. T.} } @booklet {1546, title = {Modifications of the branch-and-bound algorithm for application in constrained adaptive testing (Research Report 00-05)}, year = {2000}, address = {Enschede, The Netherlands: University of Twente, Faculty of Educational Science and Technology, Department of Measurement and Data Analysis}, author = {Veldkamp, B. P.} } @booklet {1549, title = {Multidimensional adaptive testing with constraints on test content (Research Report 00-11)}, year = {2000}, address = {Enschede, The Netherlands: University of Twente, Faculty of Educational Science and Technology, Department of Measurement and Data Analysis}, author = {Veldkamp, B. P. and van der Linden, W. J.} } @booklet {1584, title = {Multiple stratification CAT designs with content control}, year = {2000}, address = {Unpublished manuscript}, author = {Yi, Q. and Chang, Hua-Hua} } @conference {994, title = {A new item selection procedure for mixed item type in computerized classification testing}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {2000}, note = {{PDF file, 452 KB}}, address = {New Orleans}, author = {Lau, C. and Wang, T.} } @article {603, title = {The null distribution of person-fit statistics for conventional and adaptive tests}, journal = {Applied Psychological Measurement}, volume = {23}, year = {2000}, pages = {327-345}, author = {van Krimpen-Stoop, E. M. L. A. and Meijer, R. R.} } @booklet {1532, title = {Optimal stratification of item pools in a-stratified computerized adaptive testing (Research Report 00-07)}, year = {2000}, address = {Enschede, The Netherlands: University of Twente, Faculty of Educational Science and Technology, Department of Measurement and Data Analysis}, author = {van der Linden, W. J.} } @article {317, title = {Overview of the computerized adaptive testing special section}, journal = {Psicol{\'o}gica}, volume = {21}, number = {1-2}, year = {2000}, pages = {115-120}, abstract = {This paper provides an overview of the five papers included in the Psicologica special section on computerized adaptive testing. A short introduction to this topic is presented as well. The main results, the links between the five papers and the general research topic to which they are more related are also shown. (PsycINFO Database Record (c) 2005 APA )}, keywords = {Adaptive Testing, Computers computerized adaptive testing}, author = {Ponsoda, V.} } @conference {877, title = {Performance of item exposure control methods in computerized adaptive testing: Further explorations}, booktitle = {Paper presented at the Annual Meeting of the American Educational Research Association}, year = {2000}, address = {New Orleans , LA}, author = {Chang, Hua-Hua and Chang, S. and Ansley} } @article {2089, title = {Practical issues in developing and maintaining a computerized adaptive testing program}, journal = {Psicologica}, volume = {21}, year = {2000}, pages = {135-155}, author = {Wise, S. L. and Kingsbury, G. G.} } @inbook {1886, title = {Principles of multidimensional adaptive testing}, year = {2000}, address = {W. J. van der Linden and C. A. W. Glas (Eds.), Computerized adaptive testing: Theory and practice (pp. 53-73). Norwell MA: Kluwer.}, author = {Segall, D. O.} } @article {732, title = {Psychological reactions to adaptive testing}, journal = {International Journal of Selection and Assessment}, volume = {8}, year = {2000}, pages = {7-15}, author = {Tonidandel, S., and Qui{\~n}ones, M. A.} } @article {653, title = {Psychometric and psychological effects of review on computerized fixed and adaptive tests}, journal = {Psicolgia}, volume = {21}, year = {2000}, pages = {157-173}, author = {Olea, J. and Revuelta, J. and Ximenez, M. C. and Abad, F. J.} } @article {548, title = {A real data simulation of computerized adaptive administration of the MMPI-A}, journal = {Computers in Human Behavior}, volume = {16}, year = {2000}, pages = {83-96}, author = {Forbey, J. D. and Handel, R. W. and Ben-Porath, Y. S.} } @article {133, title = {A real data simulation of computerized adaptive administration of the MMPI-A}, journal = {Computers in Human Behavior}, volume = {16}, number = {1}, year = {2000}, pages = {83-96}, abstract = {A real data simulation of computerized adaptive administration of the Minnesota Multiphasic Inventory-Adolescent (MMPI-A) was conducted using item responses from three groups of participants. The first group included 196 adolescents (age range 14-18) tested at a midwestern residential treatment facility for adolescents. The second group was the normative sample used in the standardization of the MMPI-A (Butcher, Williams, Graham, Archer, Tellegen, Ben-Porath, \& Kaemmer, 1992. Minnesota Multiphasic Inventory-Adolescent (MMPI-A): manual for administration, scoring, and interpretation. Minneapolis: University of Minnesota Press.). The third group was the clinical sample: used in the validation of the MMPI-A (Williams \& Butcher, 1989. An MMPI study of adolescents: I. Empirical validation of the study{\textquoteright}s scales. Personality assessment, 1, 251-259.). The MMPI-A data for each group of participants were run through a modified version of the MMPI-2 adaptive testing computer program (Roper, Ben-Porath \& Butcher, 1995. Comparability and validity of computerized adaptive testing with the MMPI-2. Journal of Personality Assessment, 65, 358-371.). To determine the optimal amount of item savings, each group{\textquoteright}s MMPI-A item responses were used to simulate three different orderings of the items: (1) from least to most frequently endorsed in the keyed direction; (2) from least to most frequently endorsed in the keyed direction with the first 120 items rearranged into their booklet order; and (3) all items in booklet order. The mean number of items administered for each group was computed for both classification and full- scale elevations for T-score cut-off values of 60 and 65. Substantial item administration savings were achieved for all three groups, and the mean number of items saved ranged from 50 items (10.7\% of the administered items) to 123 items (26.4\% of the administered items), depending upon the T-score cut-off, classification method (i.e. classification only or full-scale elevation), and group. (C) 2000 Elsevier Science Ltd. All rights reserved.}, author = {Fobey, J. D. and Handel, R. W. and Ben-Porath, Y. S.} } @article {777, title = {Rescuing computerized testing by breaking Zipf{\textquoteright}s law}, journal = {Journal of Educational and Behavioral Statistics}, volume = {25}, year = {2000}, pages = {203-224}, author = {Wainer, H.,} } @article {791, title = {Response to Hays et al and McHorney and Cohen: Practical implications of item response theory and computerized adaptive testing: A brief summary of ongoing studies of widely used headache impact scales}, journal = {Medical Care}, volume = {38}, year = {2000}, pages = {73-82}, author = {Ware, J. E., Jr. and Bjorner, J. B. and Kosinski, M.} } @article {356, title = {A review of CAT review}, journal = {Popular Measurement}, volume = {3}, number = {1}, year = {2000}, pages = {47-49}, abstract = {Studied the effects of answer review on results of a computerized adaptive test, the laboratory professional examination of the American Society of Clinical Pathologists. Results from 29,293 candidates show that candidates who changed answers were more likely to improve their scores. (SLD)}, author = {Sekula-Wacura, R.} } @booklet {1509, title = {A selection procedure for polytomous items in computerized adaptive testing (Measurement and Research Department Reports 2000-5)}, year = {2000}, address = {Arnhem, The Netherlands: Cito}, author = {Rijn, P. W. van, and Theo Eggen and Hemker, B. T. and Sanders, P. F.} } @conference {999, title = {Solving complex constraints in a-stratified computerized adaptive testing designs}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2000}, note = {PDF file, 384 K}, address = {New Orleans, USA}, author = {Leung, C-K.. and Chang, Hua-Hua and Hau, K-T.} } @conference {1137, title = {Some considerations for improving accuracy of estimation of item characteristic curves in online calibration of computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2000}, address = {New Orleans LA}, author = {Samejima, F.} } @conference {888, title = {Specific information item selection for adaptive testing}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2000}, address = {New Orleans}, author = {Davey, T. and Fan, M.} } @booklet {1506, title = {STAR Reading 2 Computer-Adaptive Reading Test and Database: Technical Manual}, year = {2000}, address = {Wisconsin Rapids, WI: Author}, author = {Renaissance-Learning-Inc.} } @conference {1085, title = {Sufficient simplicity or comprehensive complexity? A comparison of probabilitic and stratification methods of exposure control}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2000}, address = {New Orleans LA}, author = {Parshall, C. G. and Kromrey, J. D. and Hogarty, K. Y.} } @article {413, title = {Taylor approximations to logistic IRT models and their use in adaptive testing}, journal = {Journal of Educational and Behavioral Statistics}, volume = {25}, number = {3}, year = {2000}, pages = {307-343}, abstract = {Taylor approximation can be used to generate a linear approximation to a logistic ICC and a linear ability estimator. For a specific situation it will be shown to result in a special case of a Robbins-Monro item selection procedure for adaptive testing. The linear estimator can be used for the situation of zero and perfect scores when maximum likelihood estimation fails to come up with a finite estimate. It is also possible to use this estimator to generate starting values for maximum likelihood and weighted likelihood estimation. Approximations to the expectation and variance of the linear estimator for a sequence of Robbins-Monro item selections can be determined analytically. }, keywords = {computerized adaptive testing}, author = {Veerkamp, W. J. J.} } @conference {963, title = {Test security and item exposure control for computer-based }, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Educatio}, year = {2000}, address = {Chicago}, author = {Kalohn, J.} } @conference {936, title = {Test security and the development of computerized tests}, booktitle = {Paper presented at the National Council on Measurement in Education invited symposium: Maintaining test security in computerized programs{\textendash}Implications for practice}, year = {2000}, address = {New Orleans}, author = {Guo, F. and Way, W. D. and Reshetar, R.} } @inbook {1936, title = {Testlet response theory: An analog for the 3PL model useful in testlet-based adaptive testing}, year = {2000}, address = {W. J. van der Linden and C. A. W. Glas (Eds.), Computerized Adaptive Testing: Theory and Practice (pp. 245-270). Norwell MA: Kluwer.}, author = {Wainer, H., and Bradlow, E. T. and Du, Z.} } @inbook {1932, title = {Testlet-based adaptive mastery testing, W}, year = {2000}, address = {J. van der Linden (Ed.), Computerized adaptive testing: Theory and practice (pp. 289-309). Norwell MA: Kluwer.}, author = {Vos, H. J. and Glas, C. A. W.} } @booklet {1495, title = {Testlet-based Designs for Computer-Based Testing in a Certification and Licensure Setting}, year = {2000}, address = {Jersey City, NJ: AICPA Technical Report}, author = {Pitoniak, M. J.} } @inbook {1856, title = {Using Bayesian Networks in Computerized Adaptive Tests}, year = {2000}, address = {M. Ortega and J. Bravo (Eds.),Computers and Education in the 21st Century. Kluwer, pp. 217228.}, author = {Millan, E. and Trella, M and Perez-de-la-Cruz, J.-L. and Conejo, R} } @conference {827, title = {Using constraints to develop and deliver adaptive tests}, booktitle = {Paper presented at the Computer-Assisted Testing Conference.}, year = {2000}, note = {{PDF file, 46 KB}}, author = {Abdullah, S. C and Cooley, R. E.} } @booklet {1538, title = {Using response times to detect aberrant behavior in computerized adaptive testing (Research Report 00-09)}, year = {2000}, address = {Enschede, The Netherlands: University of Twente, Faculty of Educational Science and Technology, Department of Measurement and Data Analysis}, author = {van der Linden, W. J. and van Krimpen-Stoop, E. M. L. A.} } @booklet {1339, title = {Variations in mean response times for questions on the computer-adaptive GRE general test: Implications for fair assessment (GRE Board Professional Report No}, year = {2000}, note = {$\#$BR00-01 Princeton NJ: Educational Testing Service.}, address = {96-20P: Educational Testing Service Research Report 00-7)}, author = {Bridgeman, B. and Cline, F.} } @booklet {1531, title = {Adaptive testing with equated number-correct scoring (Research Report 99-02)}, year = {1999}, address = {Enschede, The Netherlands: University of Twente, Faculty of Educational Science and Technology, Department of Measurement and Data Analysis}, author = {van der Linden, W. J.} } @conference {1293, title = {Adjusting computer adaptive test starting points to conserve item pool}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1999}, address = {Montreal, Canada}, author = {Zhu, D. and Fan. M.} } @conference {1244, title = {Adjusting "scores" from a CAT following successful item challenges}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1999}, note = {$\#$WA99-01 {PDF file, 150 KB}}, address = {Montreal, Canada}, author = {Wang, T. and Yi, Q. and Ban, J. C. and Harris, D. J. and Hanson, B. A.} } @conference {1123, title = {Alternative item selection strategies for improving test security and pool usage in computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education< Montreal}, year = {1999}, address = {Canada}, author = {Robin, F.} } @article {276, title = {Alternative methods for the detection of item preknowledge in computerized adaptive testing}, journal = {Dissertation Abstracts International: Section B: the Sciences \& Engineering}, volume = {59}, number = {7-B}, year = {1999}, pages = {3765}, keywords = {computerized adaptive testing}, author = {McLeod, Lori Davis} } @article {497, title = {a-stratified multistage computerized adaptive testing}, journal = {Applied Psychological Measurement}, volume = {23}, year = {1999}, pages = {211-222}, author = {Chang, Hua-Hua and Ying, Z.} } @article {495, title = {a-stratified multistage computerized adaptive testing}, journal = {Applied Psychological Measurement}, volume = {23}, year = {1999}, pages = {211{\textendash}222}, author = {Chang, Hua-Hua and Ying, Z.} } @article {59, title = {a-stratified multistage computerized adaptive testing}, journal = {Applied Psychological Measurement}, volume = {23}, number = {3}, year = {1999}, note = {Sage Publications, US}, pages = {211-222}, abstract = {For computerized adaptive tests (CAT) based on the three-parameter logistic mode it was found that administering items with low discrimination parameter (a) values early in the test and administering those with high a values later was advantageous; the skewness of item exposure distributions was reduced while efficiency was maintain in trait level estimation. Thus, a new multistage adaptive testing approach is proposed that factors a into the item selection process. In this approach, the items in the item bank are stratified into a number of levels based on their a values. The early stages of a test use items with lower as and later stages use items with higher as. At each stage, items are selected according to an optimization criterion from the corresponding level. Simulation studies were performed to compare a-stratified CATs with CATs based on the Sympson-Hetter method for controlling item exposure. Results indicated that this new strategy led to tests that were well-balanced, with respect to item exposure, and efficient. The a-stratified CATs achieved a lower average exposure rate than CATs based on Bayesian or information-based item selection and the Sympson-Hetter method. (PsycINFO Database Record (c) 2003 APA, all rights reserved).}, keywords = {computerized adaptive testing}, author = {Chang, Hua-Hua and Ying, Z.} } @conference {988, title = {Automated flawed item detection and graphical item used in on-line calibration of CAT-ASVAB. }, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1999}, address = {Montreal, Canada}, author = {Krass, I. A. and Thomasson, G. L.} } @article {488, title = {A Bayesian random effects model for testlets }, journal = {Psychometrika}, volume = { 64}, year = {1999}, pages = {153-168}, author = {Bradlow, E. T. and Wainer, H., and Wang, X} } @article {204, title = {Benefits from computerized adaptive testing as seen in simulation studies}, journal = {European Journal of Psychological Assessment}, volume = {15}, number = {2}, year = {1999}, pages = {91-98}, author = {Hornke, L. F.} } @article {766, title = {Can examinees use a review option to obtain positively biased ability estimates on a computerized adaptive test? }, journal = {Journal of Educational Measurement}, volume = {36}, year = {1999}, pages = {141-157}, author = {Vispoel, W. P. and Rocklin, T. R. and Wang, T. and Bleiler, T.} } @conference {1179, title = {CAT administration of language placement exams}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1999}, address = {Montreal, Canada}, author = {Stahl, J. and Gershon, R. C. and Bergstrom, B.} } @inbook {26, title = {CAT for certification and licensure}, booktitle = {Innovations in computerized assessment}, year = {1999}, note = {Using Smart Source ParsingInnovations in computerized assessment. (pp. 67-91). xiv, 266pp}, pages = {67-91}, publisher = {Lawrence Erlbaum Associates}, organization = {Lawrence Erlbaum Associates}, address = {Mahwah, N.J.}, abstract = {(from the chapter) This chapter discusses implementing computerized adaptive testing (CAT) for high-stakes examinations that determine whether or not a particular candidate will be certified or licensed. The experience of several boards who have chosen to administer their licensure or certification examinations using the principles of CAT illustrates the process of moving into this mode of administration. Examples of the variety of options that can be utilized within a CAT administration are presented, the decisions that boards must make to implement CAT are discussed, and a timetable for completing the tasks that need to be accomplished is provided. In addition to the theoretical aspects of CAT, practical issues and problems are reviewed. (PsycINFO Database Record (c) 2002 APA, all rights reserved).}, keywords = {computerized adaptive testing}, author = {Bergstrom, Betty A. and Lunz, M. E.} } @conference {1090, title = {A comparative study of ability estimates from computer-adaptive testing and multi-stage testing}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1999}, address = {Montreal Canada}, author = {Patsula, L N. and Hambleton, R. K.} } @book {1691, title = {A comparison of computerized-adaptive testing and multi-stage testing}, year = {1999}, address = {Unpublished doctoral dissertation, University of Massachusetts at Amherst}, author = {Patsula, L N.} } @conference {978, title = {A comparison of conventional and adaptive testing procedures for making single-point decisions}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1999}, note = {$\#$KI99-1}, address = {Montreal, Canada}, author = {Kingsbury, G. G. and A Zara} } @conference {1266, title = {Comparison of stratum scored and maximum likelihood scoring}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1999}, address = {Montreal, Canada}, author = {Wise, S. L.} } @booklet {1597, title = {A comparison of testlet-based test designs for computerized adaptive testing (LSAC Computerized Testing Report 97-01)}, year = {1999}, address = {Newtown, PA: LSAC.}, author = {Schnipke, D. L. and Reese, L. M.} } @conference {836, title = {Comparison of the a-stratified method, the Sympson-Hetter method, and the matched difficulty method in CAT administration}, booktitle = {Paper presented at the annual meeting of the Psychometric Society}, year = {1999}, note = {$\#$BA99-01}, address = {Lawrence KS}, author = {Ban, J and Wang, T. and Yi, Q.} } @article {28, title = {Competency gradient for child-parent centers}, journal = {Journal of Outcomes Measurement}, volume = {3}, number = {1}, year = {1999}, note = {1090-655X (Print)Journal ArticleResearch Support, U.S. Gov{\textquoteright}t, P.H.S.}, pages = {35-52}, abstract = {This report describes an implementation of the Rasch model during the longitudinal evaluation of a federally-funded early childhood preschool intervention program. An item bank is described for operationally defining a psychosocial construct called community life-skills competency, an expected teenage outcome of the preschool intervention. This analysis examined the position of teenage students on this scale structure, and investigated a pattern of cognitive operations necessary for students to pass community life-skills test items. Then this scale structure was correlated with nationally standardized reading and math achievement scores, teacher ratings, and school records to assess its validity as a measure of the community-related outcome goal for this intervention. The results show a functional relationship between years of early intervention and magnitude of effect on the life-skills competency variable.}, keywords = {*Models, Statistical, Activities of Daily Living/classification/psychology, Adolescent, Chicago, Child, Child, Preschool, Early Intervention (Education)/*statistics \& numerical data, Female, Follow-Up Studies, Humans, Male, Outcome and Process Assessment (Health Care)/*statistics \& numerical data}, author = {Bezruczko, N.} } @article {183, title = {Computerized adaptive assessment with the MMPI-2 in a clinical setting}, journal = {Psychological Assessment}, volume = {11}, number = {3}, year = {1999}, pages = {369-380}, author = {Handel, R. W. and Ben-Porath, Y. S. and Watt, M. E.} } @booklet {1631, title = {Computerized adaptive testing in the Bundeswehr}, year = {1999}, note = {$\#$ST99-01 {PDF file, 427 KB}}, address = {Unpublished manuscript}, author = {Storm, E. G.} } @article {280, title = {Computerized Adaptive Testing: Overview and Introduction}, journal = {Applied Psychological Measurement}, volume = {23}, number = {3}, year = {1999}, pages = {187-94}, abstract = {Use of computerized adaptive testing (CAT) has increased substantially since it was first formulated in the 1970s. This paper provides an overview of CAT and introduces the contributions to this Special Issue. The elements of CAT discussed here include item selection procedures, estimation of the latent trait, item exposure, measurement precision, and item bank development. Some topics for future research are also presented. }, keywords = {computerized adaptive testing}, author = {Meijer, R. R. and Nering, M. L.} } @article {642, title = {Computerized adaptive testing: Overview and introduction}, journal = {Applied Psychological Measurement}, volume = {23}, year = {1999}, pages = {187-194}, author = {Meijer, R. R. and Nering, M. L.} } @conference {993, title = {Computerized classification testing under practical constraints with a polytomous model}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1999}, note = {PDF file, 579 K}, address = {Montreal}, author = {Lau, C. A, and Wang, T.} } @proceedings {243, title = {Computerized classification testing under practical constraints with a polytomous model}, journal = {annual meeting of the American Educational Research Association}, year = {1999}, month = {04/1999}, address = {Montreal, Quebec, Canada}, author = {Lau, CA and Wang, T.} } @conference {1082, title = {Computerized testing {\textendash} Issues and applications (Mini-course manual)}, booktitle = {Annual Meeting of the National Council on Measurement in Education}, year = {1999}, address = {Montreal}, author = {Parshall, C. and Davey, T. and Spray, J. and Kalohn, J.} } @conference {919, title = {Constructing adaptive tests to parallel conventional programs}, booktitle = {Paper presented at the annual meeting of the National council on Measurement in Education}, year = {1999}, note = {$\#$FA99-01}, address = {Montreal}, author = {Fan, M. and Thompson, T. and Davey, T.} } @inbook {1929, title = {Creating computerized adaptive tests of music aptitude: Problems, solutions, and future directions}, year = {1999}, address = {F. Drasgow and J. B. Olson-Buchanan (Eds.), Innovations in computerized assessment (pp. 151-176). Mahwah NJ: Erlbaum.}, author = {Vispoel, W. P.} } @booklet {1587, title = {Current and future research in multi-stage testing (Research Report No 370)}, year = {1999}, note = {{PDF file, 131 KB}}, address = {Amherst MA: University of Massachusetts, Laboratory of Pychometric and Evaluative Research.}, author = {Zenisky, A. L.} } @article {751, title = {CUSUM-based person-fit statistics for adaptive testing}, journal = {Journal of Educational and Behavioral Statistics}, volume = {26}, year = {1999}, pages = {199-218}, author = {van Krimpen-Stoop, E. M. L. A. and Meijer, R. R.} } @booklet {1542, title = {CUSUM-based person-fit statistics for adaptive testing (Research Report 99-05)}, year = {1999}, address = {Enschede, The Netherlands: University of Twente, Faculty of Educational Science and Technology, Department of Measurement and Data Analysis}, author = {van Krimpen-Stoop, E. M. L. A. and Meijer, R. R.} } @booklet {1548, title = {Designing item pools for computerized adaptive testing (Research Report 99-03 )}, year = {1999}, address = {Enschede, The Netherlands: University of Twente, Faculty of Educational Science and Technology, Department of Measurement and Data Analysis}, author = {Veldkamp, B. P. and van der Linden, W. J.} } @article {640, title = {Detecting item memorization in the CAT environment}, journal = {Applied Psychological Measurement}, volume = {23}, year = {1999}, pages = {147-160}, author = {McLeod L. D., and Lewis, C.} } @conference {1063, title = {Detecting items that have been memorized in the CAT environment}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1999}, address = {Montreal, Canada}, author = {McLeod, L. D. and Schinpke, D. L.} } @inbook {1809, title = {Developing computerized adaptive tests for school children}, year = {1999}, address = {F. Drasgow and J. B. Olson-Buchanan (Eds.), Innovations in computerized assessment (pp. 93-115). Mahwah NJ: Erlbaum.}, author = {Kingsbury, G. G. and Houser, R.L.} } @conference {1260, title = {The development and cognitive evaluation of an audio-assisted computer-adaptive test for eight-grade mathematics}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1999}, address = {Montreal, Canada}, author = {Williams, V. S. L.} } @inbook {1857, title = {Development and introduction of a computer adaptive Graduate Record Examination General Test}, year = {1999}, address = {F. Drasgow and J .B. Olson-Buchanan (Eds.). Innovations in computerized assessment (pp. 117-135). Mahwah NJ: Erlbaum.}, author = {Mills, C. N.} } @inbook {1963, title = {The development of a computerized adaptive selection system for computer programmers in a financial services company}, year = {1999}, address = {F. Drasgow and J. B. Olsen (Eds.), Innvoations in computerized assessment (p. 7-33). Mahwah NJ Erlbaum.}, author = {Zickar, M.. J. and Overton, R. C. and Taylor, L. R. and Harms, H. J.} } @article {606, title = {The development of an adaptive test for placement in french}, journal = {Studies in language testing}, volume = {10}, year = {1999}, pages = {122-135}, author = {Laurier, M.} } @inbook {1889, title = {Development of the computerized adaptive testing version of the Armed Services Vocational Aptitude Battery}, year = {1999}, address = {F. Drasgow and J. Olson-Buchanan (Eds.). Innovations in computerized assessment. Mahwah NJ: Erlbaum.}, author = {Segall, D. O. and Moreno, K. E.} } @article {790, title = {Dynamic health assessments: The search for more practical and more precise outcomes measures}, journal = {Quality of Life Newsletter}, year = {1999}, note = {{PDF file, 75 KB}}, pages = {11-13}, author = {Ware, J. E., Jr. and Bjorner, J. B. and Kosinski, M.} } @article {220, title = {The effect of model misspecification on classification decisions made using a computerized test}, journal = {Journal of Educational Measurement}, volume = {36}, number = {1}, year = {1999}, note = {National Council on Measurement in Education, US}, pages = {47-59}, abstract = {Many computerized testing algorithms require the fitting of some item response theory (IRT) model to examinees{\textquoteright} responses to facilitate item selection, the determination of test stopping rules, and classification decisions. Some IRT models are thought to be particularly useful for small volume certification programs that wish to make the transition to computerized adaptive testing (CAT). The 1-parameter logistic model (1-PLM) is usually assumed to require a smaller sample size than the 3-parameter logistic model (3-PLM) for item parameter calibrations. This study examined the effects of model misspecification on the precision of the decisions made using the sequential probability ratio test. For this comparison, the 1-PLM was used to estimate item parameters, even though the items{\textquoteright} characteristics were represented by a 3-PLM. Results demonstrate that the 1-PLM produced considerably more decision errors under simulation conditions similar to a real testing environment, compared to the true model and to a fixed-form standard reference set of items. (PsycINFO Database Record (c) 2003 APA, all rights reserved).}, keywords = {computerized adaptive testing}, author = {Kalohn, J.C. and Spray, J. A.} } @article {665, title = {The effects of test difficulty manipulation in computerized adaptive testing and self-adapted testing}, journal = {Applied Measurement in Education}, volume = {12}, year = {1999}, pages = {167-184}, author = {Ponsoda, V. and Olea, J. and Rodriguez, M. S. and Revuelta, J.} } @article {740, title = {Empirical initialization of the trait estimator in adaptive testing}, journal = {Applied Psychological Measurement}, volume = {23}, year = {1999}, note = {[Error correction in 23, 248]}, pages = {21-29}, author = {van der Linden, W. J.} } @conference {996, title = {An enhanced stratified computerized adaptive testing design}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1999}, note = {{PDF file, 478 KB}}, address = {Montreal, Canada}, author = {Leung, C-K.. and Chang, Hua-Hua and Hau, K-T.} } @article {234, title = {Evaluating the usefulness of computerized adaptive testing for medical in-course assessment}, journal = {Academic Medicine}, volume = {74}, number = {10}, year = {1999}, note = {Kreiter, C DFerguson, KGruppen, L DUnited statesAcademic medicine : journal of the Association of American Medical CollegesAcad Med. 1999 Oct;74(10):1125-8.}, month = {Oct}, pages = {1125-8}, edition = {1999/10/28}, abstract = {PURPOSE: This study investigated the feasibility of converting an existing computer-administered, in-course internal medicine test to an adaptive format. METHOD: A 200-item internal medicine extended matching test was used for this research. Parameters were estimated with commercially available software with responses from 621 examinees. A specially developed simulation program was used to retrospectively estimate the efficiency of the computer-adaptive exam format. RESULTS: It was found that the average test length could be shortened by almost half with measurement precision approximately equal to that of the full 200-item paper-and-pencil test. However, computer-adaptive testing with this item bank provided little advantage for examinees at the upper end of the ability continuum. An examination of classical item statistics and IRT item statistics suggested that adding more difficult items might extend the advantage to this group of examinees. CONCLUSIONS: Medical item banks presently used for incourse assessment might be advantageously employed in adaptive testing. However, it is important to evaluate the match between the items and the measurement objective of the test before implementing this format.}, keywords = {*Automation, *Education, Medical, Undergraduate, Educational Measurement/*methods, Humans, Internal Medicine/*education, Likelihood Functions, Psychometrics/*methods, Reproducibility of Results}, isbn = {1040-2446 (Print)}, author = {Kreiter, C. D. and Ferguson, K. and Gruppen, L. D.} } @conference {1233, title = {An examination of conditioning variables in DIF analysis in a computer adaptive testing environment}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1999}, address = {Montreal, Canada}, author = {Walker, C. M. and Ackerman, T. A.} } @article {808, title = {Examinee judgments of changes in item difficulty: Implications for item review in computerized adaptive testing}, journal = {Applied Measurement in Education}, volume = {12}, year = {1999}, pages = {185-198}, author = {Wise, S. L. and Finney, S. J., and Enders, C. K. and Freeman, S.A. and Severance, D.D.} } @booklet {1349, title = {Exploring the relationship between item exposure rate and test overlap rate in computerized adaptive testing (ACT Research Report series 99-5)}, year = {1999}, note = {(also National Council on Measurement in Education paper, 1999).}, address = {Iowa City IA: ACT, Inc}, author = {Chen, S-Y. and Ankenmann, R. D. and Spray, J. A.} } @booklet {1348, title = {Exploring the relationship between item exposure rate and test overlap rate in computerized adaptive testing}, year = {1999}, note = {(Also ACT Research Report 99-5). (Also presented at American Educational Research Association, 1999)}, address = {Paper presented at the annual meeting of the National Council on Measurement in Education, Montreal, Canada}, author = {Chen, S. and Ankenmann, R. D. and Spray, J. A.} } @conference {924, title = {Fairness in computer-based testing}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1999}, note = {Fairness in computer-based testing. Paper presented at the annual meeting of the National Council on Measurement in Education, Montreal, Canada.}, address = {Montreal, Canada}, author = {Gallagher, Aand and Bridgeman, B. and Calahan, C} } @conference {1004, title = {Formula score and direct optimization algorithms in CAT ASVAB on-line calibration}, booktitle = {Paper presented at the annual meeting of the *?*.}, year = {1999}, author = {Levine, M. V. and Krass, I. A.} } @article {539, title = {Generating items during testing: Psychometric issues and models}, journal = {Psychometrika}, volume = {64}, year = {1999}, pages = {407-433}, author = {Embretson, S. E.} } @article {6, title = {Graphical models and computerized adaptive testing}, journal = {Applied Psychological Measurement}, volume = {23}, number = {3}, year = {1999}, pages = {223-37}, abstract = {Considers computerized adaptive testing from the perspective of graphical modeling (GM). GM provides methods for making inferences about multifaceted skills and knowledge and for extracting data from complex performances. Provides examples from language-proficiency assessment. (SLD)}, keywords = {computerized adaptive testing}, author = {Almond, R. G. and Mislevy, R. J.} } @inbook {1921, title = {Het ontwerpen van adaptieve examens [Designing adaptive tests]}, year = {1999}, note = {[In Dutch]}, address = {J. M Pieters, Tj. Plomp, and L.E. Odenthal (Eds.), Twintig jaar Toegepaste Onderwijskunde: Een kaleidoscopisch overzicht van Twents onderwijskundig onderzoek (pp. 249-267). Enschede: Twente University Press.}, author = {van der Linden, W. J.} } @conference {1020, title = {Impact of flawed items on ability estimation in CAT}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1999}, note = {$\#$LI99-01}, address = {Montreal, Canada}, author = {Liu, M. and Steffen, M.} } @conference {1075, title = {Implications from information functions and standard errors for determining preferred normed scales for CAT and P and P ASVAB}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1999}, address = {Montreal, Canada}, author = {Nicewander, W. A. and Thomasson, G. L.} } @article {327, title = {Incorporating content constraints into a multi-stage adaptive testlet design.}, number = {BBB16784}, year = {1999}, institution = {Law School Admission Council}, address = {Princeton, NJ. USA}, abstract = {Most large-scale testing programs facing computerized adaptive testing (CAT) must face the challenge of maintaining extensive content requirements, but content constraints in computerized adaptive testing (CAT) can compromise the precision and efficiency that could be achieved by a pure maximum information adaptive testing algorithm. This simulation study first evaluated whether realistic content constraints could be met by carefully assembling testlets and appropriately selecting testlets for each test taker that, when combined, would meet the content requirements of the test and would be adapted to the test takers ability level. The second focus of the study was to compare the precision of the content-balanced testlet design with that achieved by the current paper-and-pencil version of the test through data simulation. The results reveal that constraints to control for item exposure, testlet overlap, and efficient pool utilization need to be incorporated into the testlet assembly algorithm. More refinement of the statistical constraints for testlet assembly is also necessary. However, even for this preliminary attempt at assembling content-balanced testlets, the two-stage computerized test simulated with these testlets performed quite well. (Contains 5 figures, 5 tables, and 12 references.) (Author/SLD)}, isbn = {Series}, author = {Reese, L. M. and Schnipke, D. L. and Luebke, S. W.} } @book {110, title = {Innovations in computerized assessment}, year = {1999}, note = {EDRS Availability: None. Lawrence Erlbaum Associates, Inc., Publishers, 10 Industrial Avenue, Mahwah, New Jersey 07430-2262 (paperback: ISBN-0-8058-2877-X, $29.95; clothbound: ISBN-0-8058-2876-1, $59.95). Tel: 800-926-6579 (Toll Free).}, publisher = {Lawrence Erlbaum Associates, Inc.}, organization = {Lawrence Erlbaum Associates, Inc.}, address = {Mahwah, N.J.}, abstract = {Chapters in this book present the challenges and dilemmas faced by researchers as they created new computerized assessments, focusing on issues addressed in developing, scoring, and administering the assessments. Chapters are: (1) "Beyond Bells and Whistles; An Introduction to Computerized Assessment" (Julie B. Olson-Buchanan and Fritz Drasgow); (2) "The Development of a Computerized Selection System for Computer Programmers in a Financial Services Company" (Michael J. Zickar, Randall C. Overton, L. Rogers Taylor, and Harvey J. Harms); (3) "Development of the Computerized Adaptive Testing Version of the Armed Services Vocational Aptitude Battery" (Daniel O. Segall and Kathleen E. Moreno); (4) "CAT for Certification and Licensure" (Betty A. Bergstrom and Mary E. Lunz); (5) "Developing Computerized Adaptive Tests for School Children" (G. Gage Kingsbury and Ronald L. Houser); (6) "Development and Introduction of a Computer Adaptive Graduate Record Examinations General Test" (Craig N. Mills); (7) "Computer Assessment Using Visual Stimuli: A Test of Dermatological Skin Disorders" (Terry A. Ackerman, John Evans, Kwang-Seon Park, Claudia Tamassia, and Ronna Turner); (8) "Creating Computerized Adaptive Tests of Music Aptitude: Problems, Solutions, and Future Directions" (Walter P. Vispoel); (9) "Development of an Interactive Video Assessment: Trials and Tribulations" (Fritz Drasgow, Julie B. Olson-Buchanan, and Philip J. Moberg); (10) "Computerized Assessment of Skill for a Highly Technical Job" (Mary Ann Hanson, Walter C. Borman, Henry J. Mogilka, Carol Manning, and Jerry W. Hedge); (11) "Easing the Implementation of Behavioral Testing through Computerization" (Wayne A. Burroughs, Janet Murray, S. Scott Wesley, Debra R. Medina, Stacy L. Penn, Steven R. Gordon, and Michael Catello); and (12) "Blood, Sweat, and Tears: Some Final Comments on Computerized Assessment." (Fritz Drasgow and Julie B. Olson-Buchanan). Each chapter contains references. (Contains 17 tables and 21 figures.) (SLD)}, keywords = {computerized adaptive testing}, author = {F Drasgow and Olson-Buchanan, J. B.} } @inbook {1775, title = {Item calibration and parameter drift}, year = {1999}, address = {W. J. van der Linden and C. A. W. Glas (Eds.), Computer adaptive testing: Theory and practice. Norwell MA: Kluwer.}, author = {Glas, C. A. W. and Veerkamp, W. J. J.} } @conference {1083, title = {Item exposure in adaptive tests: An empirical investigation of control strategies}, booktitle = {Paper presented at the annual meeting of the Psychometric Society}, year = {1999}, address = {Lawrence KS}, author = {Parshall, C. and Hogarty, K. and Kromrey, J.} } @booklet {1410, title = {Item nonresponse: Occurrence, causes and imputation of missing answers to test items}, year = {1999}, address = {(M and T Series No 32). Leiden: DSWO Press}, author = {Huisman, J. M. E.} } @article {536, title = {Item selection in adaptive testing with the sequential probability ratio test}, journal = {Applied Psychological Measurement}, volume = {23}, year = {1999}, note = {[Reprinted as Chapter 6 in $\#$EG04-01]}, pages = {249-261}, author = {Theo Eggen} } @conference {997, title = {Item selection in computerized adaptive testing: improving the a-stratified design with the Sympson-Hetter algorithm}, booktitle = {Paper presented at the Annual Meeting of the American Educational Research Association}, year = {1999}, address = {Montreal, CA}, author = {Leung, C-K.. and Chang, Hua-Hua and Hau, K-T.} } @conference {1227, title = {Limiting answer review and change on computerized adaptive vocabulary tests: Psychometric and attitudinal results}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1999}, note = {$\#$VI99-01}, address = {Montreal, Canada}, author = {Vispoel, W. P. and Hendrickson, A. and Bleiler, T. and Widiatmo, H. and Shrairi, S. and Ihrig, D.} } @conference {933, title = {Managing CAT item development in the face of uncertainty}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1999}, address = {Montreal, Canada}, author = {Guo, F.} } @booklet {1552, title = {A minimax procedure in the context of sequential mastery testing (Research Report 99-04)}, year = {1999}, address = {Enschede, The Netherlands: University of Twente, Faculty of Educational Science and Technology, Department of Measurement and Data Analysis}, author = {Vos, H. J.} } @conference {1169, title = {More efficient use of item inventories}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1999}, address = {Montreal, Canada}, author = {Smith, R. and Zhu, R.} } @article {395, title = {Multidimensional adaptive testing with a minimum error-variance criterion}, journal = {Journal of Educational and Behavioral Statistics}, volume = {24}, number = {4}, year = {1999}, pages = {398-412}, abstract = {Adaptive testing under a multidimensional logistic response model is addressed. An algorithm is proposed that minimizes the (asymptotic) variance of the maximum-likelihood estimator of a linear combination of abilities of interest. The criterion results in a closed-form expression that is easy to evaluate. In addition, it is shown how the algorithm can be modified if the interest is in a test with a "simple ability structure". The statistical properties of the adaptive ML estimator are demonstrated for a two-dimensional item pool with several linear combinations of the abilities. }, keywords = {computerized adaptive testing}, author = {van der Linden, W. J.} } @article {752, title = {The null distribution of person-fit statistics for conventional and adaptive tests}, journal = {Applied Psychological Measurement}, volume = {23}, year = {1999}, pages = {327-345}, author = {van Krimpen-Stoop, E. M. L. A. and Meijer, R. R.} } @conference {840, title = {On-the-fly adaptive tests: An application of generative modeling to quantitative reasoning}, booktitle = {Symposium presented at the annual meeting of the National Council on Measurement in Education}, year = {1999}, address = {Montreal, Canada}, author = {Bejar, I. I.} } @article {45, title = {Optimal design for item calibration in computerized adaptive testing}, journal = {Dissertation Abstracts International: Section B: the Sciences \& Engineering}, volume = {59}, number = {8-B}, year = {1999}, pages = {4220}, abstract = {Item Response Theory is the psychometric model used for standardized tests such as the Graduate Record Examination. A test-taker{\textquoteright}s response to an item is modelled as a binary response with success probability depending on parameters for both the test-taker and the item. Two popular models are the two-parameter logistic (2PL) model and the three-parameter logistic (3PL) model. For the 2PL model, the logit of the probability of a correct response equals ai(theta j-bi), where ai and bi are item parameters, while thetaj is the test-taker{\textquoteright}s parameter, known as "proficiency." The 3PL model adds a nonzero left asymptote to model random response behavior by low theta test-takers. Assigning scores to students requires accurate estimation of theta s, while accurate estimation of theta s requires accurate estimation of the item parameters. The operational implementation of Item Response Theory, particularly following the advent of computerized adaptive testing, generally involves handling these two estimation problems separately. This dissertation addresses the optimal design for item parameter estimation. Most current designs calibrate items with a sample drawn from the overall test-taking population. For 2PL models a sequential design based on the D-optimality criterion has been proposed, while no 3PL design is in the literature. In this dissertation, we design the calibration with the ultimate use of the items in mind, namely to estimate test-takers{\textquoteright} proficiency parameters. For both the 2PL and 3PL models, this criterion leads to a locally L-optimal design criterion, named the Minimal Information Loss criterion. In turn, this criterion and the General Equivalence Theorem give a two point design for the 2PL model and a three point design for the 3PL model. A sequential implementation of this optimal design is presented. For the 2PL model, this design is almost 55\% more efficient than the simple random sample approach, and 12\% more efficient than the locally D-optimal design. For the 3PL model, the proposed design is 34\% more efficient than the simple random sample approach. (PsycINFO Database Record (c) 2003 APA, all rights reserved).}, keywords = {computerized adaptive testing}, author = {Buyske, S. G.} } @conference {1087, title = {Performance of the Sympson-Hetter exposure control algorithm with a polytomous item bank}, booktitle = {Paper presented at the annual meeting of American Educational Research Association}, year = {1999}, address = {Montreal, Canada}, author = {Pastor, D. A. and Chiang, C. and Dodd, B. G. and Yockey, R. and} } @book {1715, title = {The precision of ability estimation methods for computerized adaptive testing using the generalized partial credit model}, year = {1999}, address = {Unpublished doctoral dissertation, University of Pittsburgh}, author = {Wang, S} } @conference {1237, title = {Precision of Warm{\textquoteright}s weighted likelihood estimation of ability for a polytomous model in CAT}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1999}, note = {$\#$WA99-02 {PDF file, 604 KB}}, address = {Montreal}, author = {Wang, S and Wang, T.} } @conference {893, title = {Pretesting alongside an operational CAT}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1999}, address = {Montreal, Canada}, author = {Davey, T. and Pommerich, M and Thompson, D. T.} } @conference {1066, title = {Principles for administering adaptive tests}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1999}, address = {Montreal Canada}, author = {Miller, T. and Davey, T.} } @conference {979, title = {A procedure to compare conventional and adaptive testing procedures for making single-point decisions}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1999}, address = {Montreal, Canada}, author = {Kingsbury, G. G. and A Zara} } @conference {1267, title = {The rationale and principles of stratum scoring}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1999}, address = {Montreal, Canada}, author = {Wise, S. L.} } @article {786, title = {Reducing bias in CAT trait estimation: A comparison of approaches}, journal = {Applied Psychological Measurement}, volume = {23}, year = {1999}, pages = {263-278}, author = {Wang, T. and Hanson, B. H. and C.-M. H. Lau} } @conference {949, title = {Reducing item exposure without reducing precision (much) in computerized adaptive testing}, booktitle = { Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1999}, address = {Montreal, CA}, author = {Holmes, R. M. and Segall, D. O.} } @inbook {1758, title = {Research and development of a computer-adaptive test of listening comprehension in the less-commonly taught language Hausa}, year = {1999}, address = {M. Chalhoub-Deville (Ed.). Issues in computer-adaptive testing of reading proficiency. Cambridge, UK : Cambridge University Press.}, author = {Dunkel, P.} } @conference {1144, title = {Response time feedback on computer-administered tests}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1999}, address = {Montreal, Canada}, author = {Scrams, D. J. and Schnipke, D. L.} } @book {1726, title = {The robustness of the unidimensional 3PL IRT model when applied to two-dimensional data in computerized adaptive testing}, year = {1999}, note = {$\#$ZH99-1}, address = {Unpublished Ph.D. dissertation, State University of New York at Albany}, author = {Zhao, J. C.} } @booklet {1557, title = {Some relationship among issues in CAT item pool management}, year = {1999}, note = {$\#$WA99-03}, author = {Wang, T.} } @article {294, title = {Some reliability estimates for computerized adaptive tests}, journal = {Applied Psychological Measurement}, volume = {23}, number = {3}, year = {1999}, pages = {239-47}, abstract = {Three reliability estimates are derived for the Bayes modal estimate (BME) and the maximum likelihood estimate (MLE) of θin computerized adaptive tests (CAT). Each reliability estimate is a function of test information. Two of the estimates are shown to be upper bounds to true reliability. The three reliability estimates and the true reliabilities of both MLE and BME were computed for seven simulated CATs. Results showed that the true reliabilities for MLE and BME were nearly identical in all seven tests. The three reliability estimates never differed from the true reliabilities by more than .02 (.01 in most cases). A simple implementation of one reliability estimate was found to accurately estimate reliability in CATs. }, author = {Nicewander, W. A. and Thomasson, G. L.} } @conference {971, title = {Standard errors of proficiency estimates in stratum scored CAT}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1999}, address = {Montreal, Canada}, author = {Kingsbury, G. G.} } @conference {956, title = {Study of methods to detect aberrant response patterns in computerized testing}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1999}, address = {Montreal, Canada}, author = {Iwamoto, C. K. and Nungester, R. J. and Luecht, RM} } @booklet {1496, title = {Test anxiety and test performance: Comparing paper-based and computer-adaptive versions of the GRE General Test (Research Report 99-15)}, year = {1999}, address = {Princeton NJ: Educational Testing Service}, author = {Powers, D. E.} } @inbook {1820, title = {Testing adaptatif et {\'e}valuation des processus cognitifs}, year = {1999}, address = {C. Depover and B. No{\"e}l ({\'E}ds) : L{\textquoteright}{\'e}valuation des comp{\'e}tences et des processus cognitifs - Mod{\`e}les, pratiques et contextes. Bruxelles : De Boeck Universit{\'e}.}, author = {Laurier, M.} } @booklet {1487, title = {Tests informatizados: Fundamentos y aplicaciones (Computerized testing: Fundamentals and applications}, year = {1999}, note = {[In Spanish]}, address = {Madrid: Pirmide.}, author = {Olea, J. and Ponsoda, V. and Prieto, G., Eds.} } @conference {1181, title = {Test-taking strategies}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1999}, address = {Montreal, Canada}, author = {Steffen, M.} } @conference {1183, title = {Test-taking strategies in computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1999}, address = {Montreal, Canada}, author = {Steffen, M. and Way, W. D.} } @article {230, title = {Threats to score comparability with applications to performance assessments and computerized adaptive tests}, journal = {Educational Assessment}, volume = {6}, number = {2}, year = {1999}, pages = {73-96}, abstract = {Develops a conceptual framework that addresses score comparability for performance assessments, adaptive tests, paper-and-pencil tests, and alternate item pools for computerized tests. Outlines testing situation aspects that might threaten score comparability and describes procedures for evaluating the degree of score comparability. Suggests ways to minimize threats to comparability. (SLD)}, author = {Kolen, M. J.} } @article {599, title = {Threats to score comparability with applications to performance assessments and computerized adaptive tests}, journal = {Educational Assessment}, volume = {6}, year = {1999}, pages = {73-96}, author = {Kolen, M. J.} } @conference {828, title = {Use of conditional item exposure methodology for an operational CAT}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1999}, address = {Montreal, Canada}, author = {Anderson, D.} } @conference {866, title = {The use of linear-on-the-fly testing for TOEFL Reading}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1999}, address = {Montreal, Canada}, author = {Carey, P. A.} } @article {419, title = {The use of Rasch analysis to produce scale-free measurement of functional ability}, journal = {American Journal of Occupational Therapy}, volume = {53}, number = {1}, year = {1999}, note = {991250470272-9490Journal Article}, pages = {83-90}, abstract = {Innovative applications of Rasch analysis can lead to solutions for traditional measurement problems and can produce new assessment applications in occupational therapy and health care practice. First, Rasch analysis is a mechanism that translates scores across similar functional ability assessments, thus enabling the comparison of functional ability outcomes measured by different instruments. This will allow for the meaningful tracking of functional ability outcomes across the continuum of care. Second, once the item-difficulty order of an instrument or item bank is established by Rasch analysis, computerized adaptive testing can be used to target items to the patient{\textquoteright}s ability level, reducing assessment length by as much as one half. More importantly, Rasch analysis can provide the foundation for "equiprecise" measurement or the potential to have precise measurement across all levels of functional ability. The use of Rasch analysis to create scale-free measurement of functional ability demonstrates how this methodlogy can be used in practical applications of clinical and outcome assessment.}, keywords = {*Activities of Daily Living, Disabled Persons/*classification, Human, Occupational Therapy/*methods, Predictive Value of Tests, Questionnaires/standards, Sensitivity and Specificity}, author = {Velozo, C. A. and Kielhofner, G. and Lai, J-S.} } @article {772, title = {Using Bayesian decision theory to design a computerized mastery test}, journal = {Journal of Educational and Behavioral Statistics}, volume = {24(3)}, year = {1999}, pages = {271{\textendash}292}, author = {Vos, H. J.} } @article {406, title = {Using response-time constraints to control for differential speededness in computerized adaptive testing}, journal = {Applied Psychological Measurement}, volume = {23}, number = {3}, year = {1999}, note = {Sage Publications, US}, pages = {195-210}, abstract = {An item-selection algorithm is proposed for neutralizing the differential effects of time limits on computerized adaptive test scores. The method is based on a statistical model for distributions of examinees{\textquoteright} response times on items in a bank that is updated each time an item is administered. Predictions from the model are used as constraints in a 0-1 linear programming model for constrained adaptive testing that maximizes the accuracy of the trait estimator. The method is demonstrated empirically using an item bank from the Armed Services Vocational Aptitude Battery. }, keywords = {computerized adaptive testing}, author = {van der Linden, W. J. and Scrams, D. J. and Schnipke, D. L.} } @booklet {1350, title = {WISCAT: Een computergestuurd toetspakket voor rekenen en wiskunde [A computerized test package for arithmetic and mathematics]}, year = {1999}, address = {Cito: Arnhem, The Netherlands}, author = {Cito.} } @booklet {1386, title = {Adaptive mastery testing using the Rasch model and Bayesian sequential decision theory (Research Report 98-15)}, year = {1998}, address = {Enschede, The Netherlands: University of Twente, Faculty of Educational Science and Technology, Department of Measurement and Data Analysis}, author = {Glas, C. A. W. and Vos, H. J.} } @conference {1276, title = {Adaptive testing without IRT}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1998}, note = {(ERIC No. ED422359)}, address = {San Diego CA}, author = {Yan, D. and Lewis, C. and Stocking, M.} } @conference {908, title = {Alternatives for scoring computerized adaptive tests}, booktitle = {Paper presented at an Educational Testing Service-sponsored colloquium entitled Computer-based testing: Building the foundations for future assessments}, year = {1998}, address = {Philadelphia PA}, author = {Dodd, B. G. and Fitzpatrick, S. J.} } @inbook {105, title = {Alternatives for scoring computerized adaptive tests}, booktitle = {Computer-based testing}, year = {1998}, publisher = {Lawrence Erlbaum Associates, Inc.}, organization = {Lawrence Erlbaum Associates, Inc.}, address = {Mahwah, N.J., USA}, author = {Dodd, B. G. and Fitzpatrick, S. J.}, editor = {J. J. Fremer and W. C. Ward} } @conference {1180, title = {Application of an IRT ideal point model to computer adaptive assessment of job performance}, booktitle = {Paper presented at the annual meeting of the Society for Industrial and Organization Psychology}, year = {1998}, address = {Dallas TX}, author = {Stark, S. and F Drasgow} } @conference {985, title = {Application of direct optimization for on-line calibration in computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1998}, note = {{PDF file, 146 KB}}, address = {San Diego CA}, author = {Krass, I. A.} } @book {1659, title = {Applications of network flows to computerized adaptive testing}, year = {1998}, address = {Dissertation, Rutgers Center for Operations Research (RUTCOR), Rutgers University, New Brunswick NJ}, author = {Cordova, M. J.} } @article {81, title = {Applications of network flows to computerized adaptive testing}, journal = {Dissertation Abstracts International: Section B: the Sciences \& Engineering}, volume = {59}, number = {2-B}, year = {1998}, pages = {0855}, abstract = {Recently, the concept of Computerized Adaptive Testing (CAT) has been receiving ever growing attention from the academic community. This is so because of both practical and theoretical considerations. Its practical importance lies in the advantages of CAT over the traditional (perhaps outdated) paper-and-pencil test in terms of time, accuracy, and money. The theoretical interest is sparked by its natural relationship to Item Response Theory (IRT). This dissertation offers a mathematical programming approach which creates a model that generates a CAT that takes care of many questions concerning the test, such as feasibility, accuracy and time of testing, as well as item pool security. The CAT generated is designed to obtain the most information about a single test taker. Several methods for eatimating the examinee{\textquoteright}s ability, based on the (dichotomous) responses to the items in the test, are also offered here. (PsycINFO Database Record (c) 2003 APA, all rights reserved).}, keywords = {computerized adaptive testing}, author = {Claudio, M. J. C.} } @conference {1061, title = {A Bayesian approach to detection of item preknowledge in a CAT}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1998}, address = {San Diego CA}, author = {McLeod, L. D. and Lewis, C.} } @article {40, title = {Bayesian identification of outliers in computerized adaptive testing}, journal = {Journal of the American Statistical Association}, volume = {93}, number = {443}, year = {1998}, pages = {910-919}, abstract = {We consider the problem of identifying examinees with aberrant response patterns in a computerized adaptive test (CAT). The vec-tor of responses yi of person i from the CAT comprise a multivariate response vector. Multivariate observations may be outlying in manydi erent directions and we characterize speci c directions as corre- sponding to outliers with different interpretations. We develop a class of outlier statistics to identify different types of outliers based on a con-trol chart type methodology. The outlier methodology is adaptable to general longitudinal discrete data structures. We consider several procedures to judge how extreme a particular outlier is. Data from the National Council Licensure EXamination (NCLEX) motivates our development and is used to illustrate the results.}, author = {Bradlow, E. T. and Weiss, R. E. and Cho, M.} } @article {737, title = {Bayesian item selection criteria for adaptive testing}, journal = {Psychometrika}, volume = {63}, year = {1998}, pages = {201-216}, author = {van der Linden, W. J.} } @booklet {1535, title = {Capitalization on item calibration error in adaptive testing (Research Report 98-07)}, year = {1998}, address = {Enschede, The Netherlands: University of Twente, Faculty of Educational Science and Technology, Department of Measurement and Data Analysis}, author = {van der Linden, W. J. and Glas, C. A. W.} } @booklet {1453, title = {CASTISEL [Computer software]}, year = {1998}, address = {Philadelphia, PA: National Board of Medical Examiners}, author = {Luecht, RM} } @conference {954, title = {CAT item calibration}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1998}, address = {San Diego}, author = {Hsu, Y. and Thompson, T.D. and Chen, W-H.} } @conference {1200, title = {CAT Item exposure control: New evaluation tools, alternate methods and integration into a total CAT program}, booktitle = {Paper presented at the annual meeting of the National Council of Measurement in Education}, year = {1998}, address = {San Diego, CA}, author = {Thomasson, G. L.} } @article {348, title = {Comparability of paper-and-pencil and computer adaptive test scores on the GRE General Test}, number = {ETS Research Report 98-38}, year = {1998}, month = {August, 1998}, institution = {Educational Testing Services}, address = {Princeton, N.J.}, isbn = {ETS Research Report 98-38}, author = {Schaeffer, G. A. and Bridgeman, B. and Golub-Smith, M. L. and Lewis, C. and Potenza, M. T. and Steffen, M.} } @booklet {1633, title = {Comparability of paper-and-pencil and computer adaptive test scores on the GRE General Test (GRE Board Professional Report No 95-08P; Educational Testing Service Research Report 98-38)}, year = {1998}, address = {Princeton, NJ: Educational Testing Service}, author = {Schaeffer, G. and Bridgeman, B. and Golub-Smith, M. L. and Lewis, C. and Potenza, M. T. and Steffen, M.} } @book {1654, title = {A comparative study of item exposure control methods in computerized adaptive testing}, year = {1998}, address = {Unpublished doctoral dissertation, University of Iowa , Iowa City IA}, author = {Chang, S-W.} } @booklet {1347, title = {A comparative study of item exposure control methods in computerized adaptive testing}, year = {1998}, note = {$\#$CH98-03}, address = {Research Report Series 98-3, Iowa City: American College Testing.}, author = {Chang, S-W. and Twu, B.-Y.} } @conference {992, title = {Comparing and combining dichotomous and polytomous items with SPRT procedure in computerized classification testing}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1998}, note = {PDF file, 375 K}, address = {San Diego}, author = {Lau, CA and Wang, T.} } @article {679, title = {A comparison of item exposure control methods in computerized adaptive testing}, journal = {Journal of Educational Measurement}, volume = {35}, year = {1998}, pages = {311-327}, author = {Revuelta, J., and Ponsoda, V.} } @article {500, title = {A comparison of maximum likelihood estimation and expected a posteriori estimation in CAT using the partial credit model}, journal = {Educational and Psychological Measurement}, volume = {58}, year = {1998}, pages = {569-595}, author = {Chen, S. and Hou, L. and Dodd, B. G.} } @conference {1195, title = {A comparison of two methods of controlling item exposure in computerized adaptive testing}, booktitle = {Paper presented at the meeting of the American Educational Research Association. San Diego CA.}, year = {1998}, author = {Tang, L. and Jiang, H. and Chang, Hua-Hua} } @booklet {1313, title = {Computer adaptive testing {\textendash} Approaches for item selection and measurement}, number = {(Research report)}, year = {1998}, address = {Rutgers Center for Operations Research, New Brunswick NJ}, author = {Armstrong, R. D. and Jones, D. H.} } @article {625, title = {Computer-assisted test assembly using optimization heuristics}, journal = {Applied Psychological Measurement}, volume = {22 }, year = {1998}, pages = {224-236.}, author = {Luecht, RM} } @conference {864, title = {Computerized adaptive rating scales that measure contextual performance}, booktitle = {Paper presented at the 3th annual conference of the Society for Industrial and Organizational Psychology}, year = {1998}, address = {Dallas TX}, author = {Borman, W. C. and Hanson, M. A. and Montowidlo, S. J and F Drasgow and Foster, L and Kubisiak, U. C.} } @article {377, title = {Computerized adaptive testing: What it is and how it works}, journal = {Educational Technology}, volume = {38}, number = {1}, year = {1998}, pages = {45-52}, abstract = {Describes the workings of computerized adaptive testing (CAT). Focuses on the key concept of information and then discusses two important components of a CAT system: the calibrated item bank and the testing algorithm. Describes a CAT that was designed for making placement decisions on the basis of two typical test administrations and notes the most significant differences between traditional paper-based testing and CAT. (AEF)}, author = {Straetmans, G. J. J. M. and Theo Eggen} } @conference {831, title = {Computerized adaptive testing with multiple form structures}, booktitle = {Paper presented at the annual meeting of the Psychometric Society}, year = {1998}, address = {Urbana, IL}, author = {Armstrong, R. D. and Jones, D. H. and Berliner, N.} } @conference {1205, title = {Constructing adaptive tests to parallel conventional programs}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1998}, address = {San Diego}, author = {Thompson, T. and Davey, T. and Nering, M. L.} } @conference {1206, title = {Constructing passage-based tests that parallel conventional programs}, booktitle = {Paper presented at the annual meeting of the Psychometric Society}, year = {1998}, address = {Urbana, IL}, author = {Thompson, T. and Davey, T. and Nering, M. L.} } @conference {890, title = {Controlling item exposure and maintaining item security}, booktitle = {Paper presented at an Educational Testing Service-sponsored colloquium entitled {\textquotedblleft}Computer-based testing: Building the foundations for future assessments}, year = {1998}, address = {{\textquotedblright} Philadelphia PA}, author = {Davey, T. and Nering, M. L.} } @article {372, title = {Controlling item exposure conditional on ability in computerized adaptive testing}, journal = {Journal of Educational \& Behavioral Statistics}, volume = {23}, number = {1}, year = {1998}, note = {American Educational Research Assn, US}, pages = {57-75}, abstract = {The interest in the application of large-scale adaptive testing for secure tests has served to focus attention on issues that arise when theoretical advances are made operational. One such issue is that of ensuring item and pool security in the continuous testing environment made possible by the computerized admin-istration of a test, as opposed to the more periodic testing environment typically used for linear paper-and-pencil tests. This article presents a new method of controlling the exposure rate of items conditional on ability level in this continuous testing environment. The properties of such conditional control on the exposure rates of items, when used in conjunction with a particular adaptive testing algorithm, are explored through studies with simulated data. }, author = {Stocking, M. L. and Lewis, C.} } @conference {1250, title = {Developing, maintaining, and renewing the item inventory to support computer-based testing}, booktitle = {Paper presented at the colloquium}, year = {1998}, address = {Computer-Based Testing: Building the Foundation for Future Assessments, Philadelphia PA}, author = {Way, W. D. and Steffen, M. and Anderson, G. S.} } @booklet {1438, title = {Development and evaluation of online calibration procedures (TCN 96-216)}, year = {1998}, address = {Champaign IL: Algorithm Design and Measurement Services, Inc}, author = {Levine, M. L. and Williams.} } @booklet {1478, title = {Does adaptive testing violate local independence? (Research Report 98-33)}, year = {1998}, address = {Princeton NJ: Educational Testing Service}, author = {Mislevy, R. J. and Chang, Hua-Hua} } @article {177, title = {The effect of item pool restriction on the precision of ability measurement for a Rasch-based CAT: comparisons to traditional fixed length examinations}, journal = {J Outcome Meas}, volume = {2}, number = {2}, year = {1998}, note = {983263801090-655xJournal Article}, pages = {97-122}, abstract = {This paper describes a method for examining the precision of a computerized adaptive test with a limited item pool. Standard errors of measurement ascertained in the testing of simulees with a CAT using a restricted pool were compared to the results obtained in a live paper-and-pencil achievement testing of 4494 nursing students on four versions of an examination of calculations of drug administration. CAT measures of precision were considered when the simulated examine pools were uniform and normal. Precision indices were also considered in terms of the number of CAT items required to reach the precision of the traditional tests. Results suggest that regardless of the size of the item pool, CAT provides greater precision in measurement with a smaller number of items administered even when the choice of items is limited but fails to achieve equiprecision along the entire ability continuum.}, keywords = {*Decision Making, Computer-Assisted, Comparative Study, Computer Simulation, Education, Nursing, Educational Measurement/*methods, Human, Models, Statistical, Psychometrics/*methods}, author = {Halkitis, P. N.} } @conference {962, title = {Effect of item selection on item exposure rates within a computerized classification test}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1998}, address = {San Diego CA}, author = {Kalohn, J.C. and Spray, J. A.} } @conference {1297, title = {An empirical Bayes approach to Mantel-Haenszel DIF analysis: Theoretical development and application to CAT data}, booktitle = {Paper presented at the annual meeting of the Psychometric Society}, year = {1998}, address = {Urbana, IL}, author = {Zwick, R.} } @conference {1242, title = {Essentially unbiased Bayesian estimates in computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1998}, address = {San Diego}, author = {Wang, T. and Lau, C. and Hanson, B. A.} } @conference {889, title = {Evaluating and insuring measurement precision in adaptive testing}, booktitle = {Paper presented at the annual meeting of the Psychometric Society}, year = {1998}, address = {Urbana, IL}, author = {Davey, T. and Nering, M. L.} } @conference {1182, title = {Evaluation of methods for the use of underutilized items in a CAT environment}, booktitle = {Paper presented at the annual meeting of the Psychometric Society}, year = {1998}, address = {Urbana, IL}, author = {Steffen, M. and Liu, M.} } @conference {1187, title = {An examination of item-level response times from an operational CAT}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1998}, address = {Urbana IL}, author = {Swygert, K.} } @conference {1168, title = {Expected losses for individuals in Computerized Mastery Testing}, booktitle = {Paper presented at the annual meeting of National Council on Measurement in Education}, year = {1998}, address = {San Diego}, author = {Smith, R. and Lewis, C.} } @booklet {1335, title = {Feasibility studies of two-stage testing in large-scale educational assessment: Implications for NAEP}, year = {1998}, address = {American Institutes for Research, CA}, author = {Bock, R. D. and Zimowski, M. F.} } @booklet {1616, title = {A framework for comparing adaptive test designs}, number = {(Unpublished manuscript)}, year = {1998}, address = {Princeton NJ: Educational Testing Service}, author = {Stocking, M. L.} } @conference {1027, title = {A framework for exploring and controlling risks associated with test item exposure over time}, booktitle = {Paper presented at the Annual Meeting of the National Council for Measurement in Education}, year = {1998}, address = {San Diego, CA}, author = {Luecht, RM} } @conference {1071, title = {A hybrid method for controlling item exposure in computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the Psychometric Society}, year = {1998}, address = {Urbana, IL}, author = {Nering, M. L. and Davey, T. and Thompson, T.} } @conference {1078, title = {A hybrid method for controlling item exposure in computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the Psychometric Society}, year = {1998}, address = {Urbana, IL}, author = {Nering, M. L. and Davey, T. and Thompson, T.} } @conference {1282, title = {The impact of nonmodel-fitting responses in a realistic CAT environment}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1998}, address = {San Diego CA}, author = {Yi, Q, and Nering, M.} } @conference {1019, title = {The impact of scoring flawed items on ability estimation in CAT}, booktitle = {Paper presented at the annual meeting of the Psychometric Society}, year = {1998}, address = {Urbana, IL}, author = {Liu, M. and Steffen, M.} } @inbook {1848, title = {Innovations in computer-based ability testing: Promise, problems and perils}, year = {1998}, address = {In Hakel, M.D. (Ed.) Beyond multiple choice: Alternatives to traditional testing for selection. Hillsdale, NJ: Lawrence Erlbaum Associates.}, author = {J. R. McBride} } @booklet {344, title = {Item banking}, journal = {Practical Assessment, Research and Evaluation}, volume = {6}, number = {4}, year = {1998}, note = {Using Smart Source Parsing}, abstract = {Discusses the advantages and disadvantages of using item banks while providing useful information to those who are considering implementing an item banking project in their school district. The primary advantage of item banking is in test development. Also describes start-up activities in implementing item banking. (SLD)}, author = {Rudner, L. M.} } @conference {1080, title = {Item development and pretesting in a computer-based testing environment}, booktitle = {Paper presented at the Educational Testing Service Sponsored Colloquium on Computer-Based Testing: Building the Foundation for Future Assessments}, year = {1998}, address = {Philadelphia}, author = {Parshall, C. G.} } @booklet {1367, title = {Item selection in adaptive testing with the sequential probability ratio test (Measurement and Research Department Report, 98-1)}, year = {1998}, note = {[see APM paper, 1999; also reprinted as Chapter 6 in $\#$EG04-01.]}, address = {Arnhem, The Netherlands: Cito.}, author = {Theo Eggen} } @conference {946, title = {Item selection in computerized adaptive testing: Should more discriminating items be used first? Paper presented at the annual meeting of the American Educational Research Association, San Diego, CA}, year = {1998}, author = {Hau, K. T. and Chang, Hua-Hua} } @article {567, title = {Item selection in computerized adaptive testing: Should more discriminating items be used first?}, journal = {Journal of Educational Measurement}, volume = {38}, year = {1998}, pages = {249-266}, author = {Hau, K. T. and Chang, Hua-Hua} } @article {261, title = {Maintaining content validity in computerized adaptive testing}, journal = {Advances in Health Sciences Education}, volume = {3}, number = {1}, year = {1998}, note = {Kluwer Academic Publishers, Netherlands}, pages = {29-41}, abstract = {The authors empirically demonstrate some of the trade-offs which can occur when content balancing is imposed in computerized adaptive testing (CAT) forms or conversely, when it is ignored. The authors contend that the content validity of a CAT form can actually change across a score scale when content balancing is ignored. However they caution that, efficiency and score precision can be severely reduced by over specifying content restrictions in a CAT form. The results from 2 simulation studies are presented as a means of highlighting some of the trade-offs that could occur between content and statistical considerations in CAT form assembly. (PsycINFO Database Record (c) 2003 APA, all rights reserved).}, keywords = {computerized adaptive testing}, author = {Luecht, RM and de Champlain, A. and Nungester, R. J.} } @article {633, title = {Measuring change conventionally and adaptively}, journal = {Educational and Psychological Measurement}, volume = {58}, year = {1998}, pages = {882-897}, author = {May, K. and Nicewander, W. A.} } @article {405, title = {A model for optimal constrained adaptive testing}, journal = {Applied Psychological Measurement}, volume = {22}, number = {3}, year = {1998}, note = {Sage Publications, US}, pages = {259-270}, abstract = {A model for constrained computerized adaptive testing is proposed in which the information in the test at the trait level (0) estimate is maximized subject to a number of possible constraints on the content of the test. At each item-selection step, a full test is assembled to have maximum information at the current 0 estimate, fixing the items already administered. Then the item with maximum in-formation is selected. All test assembly is optimal because a linear programming (LP) model is used that automatically updates to allow for the attributes of the items already administered and the new value of the 0 estimator. The LP model also guarantees that each adaptive test always meets the entire set of constraints. A simulation study using a bank of 753 items from the Law School Admission Test showed that the 0 estimator for adaptive tests of realistic lengths did not suffer any loss of efficiency from the presence of 433 constraints on the item selection process. }, keywords = {computerized adaptive testing}, author = {van der Linden, W. J. and Reese, L. M.} } @conference {1062, title = {A new approach for the detection of item preknowledge in computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the Psychometric Society}, year = {1998}, address = {Urbana, IL}, author = {McLeod, L. D. and Lewis, C.} } @conference {1281, title = {Nonmodel-fitting responses and robust ability estimation in a realistic CAT environment}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1998}, address = {San Diego CA}, author = {Yi, Q, and Nering, M.} } @article {714, title = {Optimal design of item pools for computerized adaptive testing}, journal = {Applied Psychological Measurement}, volume = {22}, year = {1998}, pages = {271-279}, author = {Stocking, M. L. and Swanson, L.} } @article {771, title = {Optimal sequential rules for computer-based instruction}, journal = {Journal of Educational Computing Research}, volume = {19(2)}, year = {1998}, pages = {133-154}, author = {Vos, H. J.} } @article {738, title = {Optimal test assembly of psychological and educational tests}, journal = {Applied Psychological Measurement}, volume = {22}, year = {1998}, pages = {195-211}, author = {van der Linden, W. J.} } @conference {1037, title = {Patterns of item exposure using a randomized CAT algorithm}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1998}, address = {San Diego, CA}, author = {Lunz, M. E. and Stahl, J. A.} } @booklet {1541, title = {Person fit based on statistical process control in an adaptive testing environment (Research Report 98-13)}, year = {1998}, address = {Enschede, The Netherlands: University of Twente, Faculty of Educational Science and Technology, Department of Measurement and Data Analysis}, author = {van Krimpen-Stoop, E. M. L. A. and Meijer, R. R.} } @article {806, title = {Practical issues in computerized test assembly}, journal = {Applied Psychological Measurement}, volume = {22}, year = {1998}, pages = {292-302}, author = {Wightman, L. F.} } @article {2119, title = {Properties of ability estimation methods in computerized adaptive testing}, journal = {Journal of Educational Measurement}, volume = {35}, year = {1998}, pages = {109-135}, author = {Wang, T. and Vispoel, W. P.} } @article {798, title = {Protecting the integrity of computerized testing item pools}, journal = {Educational Measurement: Issues and Practice}, volume = {17(4)}, year = {1998}, pages = {17-27}, author = {Way, W. D.} } @article {760, title = {Psychometric characteristics of computer-adaptive and self-adaptive vocabulary tests: The role of answer feedback and test anxiety}, journal = {Journal of Educational Measurement}, volume = {35}, year = {1998}, pages = {328-347 or 155-167}, author = {Vispoel, W. P.} } @booklet {1384, title = {Quality control of on-line calibration in computerized adaptive testing (Research Report 98-03)}, year = {1998}, address = {Enschede, The Netherlands: University of Twente, Faculty of Educational Science and Technology, Department of Measurement and Data Analysis}, author = {Glas, C. A. W.} } @booklet {1514, title = {The relationship between computer familiarity and performance on computer-based TOEFL test tasks (Research Report 98-08)}, year = {1998}, address = {Princeton NJ: Educational Testing Service}, author = {Taylor, C. and Jamieson, J. and Eignor, D. R. and Kirsch, I.} } @article {759, title = {Reviewing and changing answers on computer-adaptive and self-adaptive vocabulary tests}, journal = {Journal of Educational Measurement}, volume = {35}, year = {1998}, note = {(Also presented at National Council on Measurement in Education, 1996)}, pages = {328-345}, author = {Vispoel, W. P.} } @booklet {1585, title = {Simulating nonmodel-fitting responses in a CAT Environment (Research Report 98-10)}, year = {1998}, note = {$\#$YI-98-10}, address = {Iowa City IA: ACT Inc. (Also presented at National Council on Measurement in Education, 1999: ERIC No. ED 427 042)}, author = {Yi, Q. and Nering, M, L.} } @booklet {1475, title = {Simulating the null distribution of person-fit statistics for conventional and adaptive tests (Research Report 98-02)}, year = {1998}, address = {Enschede, The Netherlands: University of Twente, Faculty of Educational Science and Technology, Department of Measurement and Data Analysis}, author = {Meijer, R. R. and van Krimpen-Stoop, E. M. L. A.} } @article {373, title = {Simulating the use of disclosed items in computerized adaptive testing}, journal = {Journal of Educational Measurement}, volume = {35}, number = {1}, year = {1998}, note = {National Council on Measurement in Education, US}, pages = {48-68}, abstract = {Regular use of questions previously made available to the public (i.e., disclosed items) may provide one way to meet the requirement for large numbers of questions in a continuous testing environment, that is, an environment in which testing is offered at test taker convenience throughout the year rather than on a few prespecified test dates. First it must be shown that such use has effects on test scores small enough to be acceptable. In this study simulations are used to explore the use of disclosed items under a worst-case scenario which assumes that disclosed items are always answered correctly. Some item pool and test designs were identified in which the use of disclosed items produces effects on test scores that may be viewed as negligible.}, keywords = {computerized adaptive testing}, author = {Stocking, M. L. and W. C. Ward and Potenza, M. T.} } @conference {1136, title = {Some considerations for eliminating biases in ability estimation in computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1998}, author = {Samejima, F.} } @conference {1196, title = {Some item response theory to provide scale scores based on linear combinations of testlet scores, for computerized adaptive tests}, booktitle = {Paper presented at the annual meeting of the Psychometric Society}, year = {1998}, address = {Urbana, IL}, author = {Thissen, D.} } @article {627, title = {Some practical examples of computerized adaptive sequential testing}, journal = {Journal of Educational Measurement}, volume = {35}, year = {1998}, pages = {229-249}, author = {Luecht, RM and Nungester, R. J.} } @conference {1074, title = {Some reliability estimators for computerized adaptive tests}, booktitle = {Paper presented at the annual meeting of the Psychometric Society}, year = {1998}, address = {Urbana, IL}, author = {Nicewander, W. A. and Thomasson, G. L.} } @article {155, title = {Statistical tests for person misfit in computerized adaptive testing}, number = {98-01}, year = {1998}, pages = {28}, institution = {Faculty of Educational Science and Technology, Univeersity of Twente}, address = {Enschede, The Netherlands}, isbn = {98-01}, author = {Glas, C. A. W. and Meijer, R. R. and van Krimpen-Stoop, E. M.} } @booklet {1385, title = {Statistical tests for person misfit in computerized adaptive testing (Research Report 98-01)}, year = {1998}, address = {Enschede, The Netherlands : University of Twente, Faculty of Educational Science and Technology, Department of Measurement and Data Analysis}, author = {Glas, C. A. W. and Meijer, R. R. and van Krimpen-Stoop, E. M. L. A.} } @article {739, title = {Stochastic order in dichotomous item response models for fixed, adaptive, and multidimensional tests}, journal = {Psychometrika}, volume = {63}, year = {1998}, pages = {211-226}, author = {van der Linden, W. J.} } @article {632, title = {Swedish Enlistment Battery: Construct validity and latent variable estimation of cognitive abilities by the CAT-SEB}, journal = {International Journal of Selection and Assessment}, volume = {6}, year = {1998}, pages = {107-114}, author = {Mardberg, B. and Carlstedt, B.} } @conference {1081, title = {Test development exposure control for adaptive testing}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1998}, address = {San Diego, CA}, author = {Parshall, C. G. and Davey, T. and Nering, M. L.} } @article {611, title = {Testing word knowledge by telephone to estimate general cognitive aptitude using an adaptive test}, journal = {Intelligence}, volume = {26}, year = {1998}, pages = {91-98}, author = {Legree, P. J. and Fischl, M. A and Gade, P. A. and Wilson, M.} } @booklet {1328, title = {Three response types for broadening the conception of mathematical problem solving in computerized-adaptive tests (Research Report 98-45)}, year = {1998}, note = {$\#$BE98-45 (Also presented at National Council on Measurement in Education, 1998)}, address = {Princeton NJ : Educational Testing Service}, author = {Bennett, R. E. and Morley, M. and Quardt, D.} } @booklet {1537, title = {Using response-time constraints to control for differential speededness in adaptive testing (Research Report 98-06)}, year = {1998}, address = {Enschede, The Netherlands: University of Twente, Faculty of Educational Science and Technology, Department of Measurement and Data Analysis}, author = {van der Linden, W. J. and Scrams, D. J. and Schnipke, D. L.} } @conference {1268, title = {The accuracy of examinee judgments of relative item difficulty: Implication for computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1997}, address = {Chicago}, author = {Wise, S. L. and Freeman, S.A. and Finney, S. J. and Enders, C. K. and Severance, D.D.} } @article {655, title = {Adapting to adaptive testing}, journal = {Personnel Psychology}, volume = {50}, year = {1997}, pages = {171-185}, author = {Overton, R. C. and Harms, H. J. and Taylor, L. R. and Zickar, M.. J.} } @conference {1289, title = {Administering and scoring the computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1997}, address = {Chicago IL}, author = {A Zara} } @conference {931, title = {Alternate methods of scoring computer-based adaptive tests}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1997}, address = {Chicago IL}, author = {Green, B. F.} } @article {711, title = {An alternative method for scoring adaptive tests}, journal = {Journal of Educational and Behavioral Statistics}, volume = {21}, year = {1997}, note = {(Also Educational Testing Service RR 94-48)}, pages = {365-389}, author = {Stocking, M. L.} } @booklet {1551, title = {Applications of Bayesian decision theory to sequential mastery testing (Research Report 97-06)}, year = {1997}, address = {Twente, The Netherlands: Department of Educational Measurement and Data Analysis}, author = {Vos, H. J.} } @conference {1148, title = {Applications of multidimensional adaptive testing}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1997}, address = {Montreal, Canada}, author = {Segall, D. O.} } @conference {863, title = {Assessing speededness in variable-length computer-adaptive tests}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1997}, address = {Chicago IL}, author = {Bontempo, B and Julian, E. R and Gorham, J. L.} } @conference {1296, title = {A Bayesian enhancement of Mantel Haenszel DIF analysis for computer adaptive tests}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1997}, address = {Chicago IL}, author = {Zwick, R.} } @conference {1175, title = {Calibration of CAT items administered online for classification: Assumption of local independence}, booktitle = {Paper presented at the annual meeting of the Psychometric Society}, year = {1997}, address = {Gatlinburg TN}, author = {Spray, J. A. and Parshall, C. G. and Huang, C.-H.} } @booklet {1467, title = {CAST 5 for Windows users{\textquoteright} guide}, year = {1997}, address = {Contract No. "MDA903-93-D-0032, DO 0054. Alexandria, VA: Human Resources Research Organization}, author = {J. R. McBride and Cooper, R. R} } @inbook {1958, title = {CAT-ASVAB cost and benefit analyses}, year = {1997}, address = {W. A. Sands, B. K. Waters, and J. R. McBride (Eds.), Computer adaptive testing: From inquiry to operation (pp. 227-236). Washington, DC: American Psychological Association.}, author = {Wise, L. L. and Curran, L. T. and J. R. McBride} } @inbook {1859, title = {CAT-ASVAB operational test and evaluation}, year = {1997}, address = {W. A. Sands, B. K. Waters, and . R. McBride (Eds.), Computerized adaptive testing: From inquiry to operation (pp. 199-205). Washington DC: American Psychological Association.}, author = {Moreno, K. E.} } @booklet {1482, title = {CATSIB: A modified SIBTEST procedure to detect differential item functioning in computerized adaptive tests (Research report)}, year = {1997}, address = {Newtown, PA: Law School Admission Council}, author = {Nandakumar, R. and Roussos, L.} } @conference {942, title = {Comparability and validity of computerized adaptive testing with the MMPI-2 using a clinical sample}, booktitle = {Paper presented at the 32nd Annual Symposium and Recent Developments in the use of the MMPI-2 and MMPI-A. Minneapolis MN.}, year = {1997}, author = {Handel, R. W. and Ben-Porath, Y. S. and Watt, M.} } @article {66, title = {A comparison of maximum likelihood estimation and expected a posteriori estimation in computerized adaptive testing using the generalized partial credit model}, journal = {Dissertation Abstracts International: Section B: the Sciences \& Engineering}, volume = {58}, number = {1-B}, year = {1997}, pages = {453}, abstract = {A simulation study was conducted to investigate the application of expected a posteriori (EAP) trait estimation in computerized adaptive tests (CAT) based on the generalized partial credit model (Muraki, 1992), and to compare the performance of EAP with maximum likelihood trait estimation (MLE). The performance of EAP was evaluated under different conditions: the number of quadrature points (10, 20, and 30), and the type of prior distribution (normal, uniform, negatively skewed, and positively skewed). The relative performance of the MLE and EAP estimation methods were assessed under two distributional forms of the latent trait, one normal and the other negatively skewed. Also, both the known item parameters and estimated item parameters were employed in the simulation study. Descriptive statistics, correlations, scattergrams, accuracy indices, and audit trails were used to compare the different methods of trait estimation in CAT. The results showed that, regardless of the latent trait distribution, MLE and EAP with a normal prior, a uniform prior, or the prior that matches the latent trait distribution using either 20 or 30 quadrature points provided relatively accurate estimation in CAT based on the generalized partial credit model. However, EAP using only 10 quadrature points did not work well in the generalized partial credit CAT. Also, the study found that increasing the number of quadrature points from 20 to 30 did not increase the accuracy of EAP estimation. Therefore, it appears 20 or more quadrature points are sufficient for accurate EAP estimation. The results also showed that EAP with a negatively skewed prior and positively skewed prior performed poorly for the normal data set, and EAP with positively skewed prior did not provide accurate estimates for the negatively skewed data set. Furthermore, trait estimation in CAT using estimated item parameters produced results similar to those obtained using known item parameters. In general, when at least 20 quadrature points are used, EAP estimation with a normal prior, a uniform prior or the prior that matches the latent trait distribution appears to be a good alternative to MLE in the application of polytomous CAT based on the generalized partial credit model. (PsycINFO Database Record (c) 2003 APA, all rights reserved).}, keywords = {computerized adaptive testing}, author = {Chen, S-K.} } @conference {1143, title = {A comparison of testlet-based test designs for computerized adaptive testing}, booktitle = {Paper presented at the meeting of American Educational Research Association}, year = {1997}, address = {Chicago, IL}, author = {Schnipke, D. L. and Reese, L. M.} } @conference {867, title = {Computer assembly of tests so that content reigns supreme}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1997}, address = {Chicago IL}, author = {Case, S. M. and Luecht, RM} } @article {534, title = {Computer-adaptive testing of listening comprehension: A blueprint of CAT Development}, journal = {The Language Teacher Online 21}, volume = {no. 10. }, year = {1997}, note = {.}, author = {Dunkel, P. A.} } @article {767, title = {Computerized adaptive and fixed-item testing of music listening skill: A comparison of efficiency, precision, and concurrent validity}, journal = {Journal of Educational Measurement}, volume = {34}, year = {1997}, pages = {43-63}, author = {Vispoel, W. P., and Wang, T.} } @article {768, title = {Computerized adaptive and fixed-item testing of music listening skill: A comparison of efficiency, precision, and concurrent validity}, journal = {Journal of Educational Measurement}, volume = {34}, year = {1997}, pages = {43-63}, author = {Vispoel, W. P. and Wang, T. and Bleiler, T.} } @book {347, title = {Computerized adaptive testing: From inquiry to operation}, year = {1997}, note = {References .Using Smart Source Parsingxvii, pp}, publisher = {American Psychological Association}, organization = {American Psychological Association}, address = {Washington, D.C., USA}, abstract = {(from the cover) This book traces the development of computerized adaptive testing (CAT) from its origins in the 1960s to its integration with the Armed Services Vocational Aptitude Battery (ASVAB) in the 1990s. A paper-and-pencil version of the battery (P\&P-ASVAB) has been used by the Defense Department since the 1970s to measure the abilities of applicants for military service. The test scores are used both for initial qualification and for classification into entry-level training opportunities. /// This volume provides the developmental history of the CAT-ASVAB through its various stages in the Joint-Service arena. Although the majority of the book concerns the myriad technical issues that were identified and resolved, information is provided on various political and funding support challenges that were successfully overcome in developing, testing, and implementing the battery into one of the nation{\textquoteright}s largest testing programs. The book provides useful information to professionals in the testing community and everyone interested in personnel assessment and evaluation. (PsycINFO Database Record (c) 2004 APA, all rights reserved).}, keywords = {computerized adaptive testing}, author = {Sands, W. A. and B. K. Waters and J. R. McBride} } @article {33, title = {A computerized adaptive testing system for speech discrimination measurement: The Speech Sound Pattern Discrimination Test}, journal = {Journal of the Accoustical Society of America}, volume = {101}, number = {4}, year = {1997}, note = {972575560001-4966Journal Article}, pages = {2289-298}, abstract = {A computerized, adaptive test-delivery system for the measurement of speech discrimination, the Speech Sound Pattern Discrimination Test, is described and evaluated. Using a modified discrimination task, the testing system draws on a pool of 130 items spanning a broad range of difficulty to estimate an examinee{\textquoteright}s location along an underlying continuum of speech processing ability, yet does not require the examinee to possess a high level of English language proficiency. The system is driven by a mathematical measurement model which selects only test items which are appropriate in difficulty level for a given examinee, thereby individualizing the testing experience. Test items were administered to a sample of young deaf adults, and the adaptive testing system evaluated in terms of respondents{\textquoteright} sensory and perceptual capabilities, acoustic and phonetic dimensions of speech, and theories of speech perception. Data obtained in this study support the validity, reliability, and efficiency of this test as a measure of speech processing ability.}, keywords = {*Diagnosis, Computer-Assisted, *Speech Discrimination Tests, *Speech Perception, Adolescent, Adult, Audiometry, Pure-Tone, Human, Middle Age, Psychometrics, Reproducibility of Results}, author = {Bochner, J. and Garrison, W. and Palmer, L. and MacKenzie, D. and Braveman, A.} } @booklet {1602, title = {Computerized adaptive testing through the World Wide Web}, number = {(ERIC No. ED414536)}, year = {1997}, address = {(ERIC No. ED414536)}, author = {Shermis, M. D. and Mzumara, H. and Brown, M. and Lillig, C.} } @conference {1156, title = {Computerized adaptive testing through the World Wide Web}, year = {1997}, author = {Shermis, M. D.} } @inbook {1730, title = {Computerized adaptive testing using the partial credit model for attitude measurement}, year = {1997}, address = {M. Wilson, G. Engelhard Jr and K. Draney (Eds.), Objective measurement: Theory into practice, volume 4. Norwood NJ: Ablex.}, author = {Baek, S. G.} } @conference {1157, title = {Controlling test and computer anxiety: Test performance under CAT and SAT conditions}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1997}, address = {Chicago IL}, author = {Shermis, M. D. and Mzumara, H. and Bublitz, S.} } @inbook {1888, title = {Current and future challenges}, year = {1997}, address = {W. A. Sands, B. K. Waters, and J. R. McBride (Eds.). Computerized adaptive testing: From inquiry to operation (pp 257-269). Washington DC: American Psychological Association.}, author = {Segall, D. O. and Moreno, K. E.} } @conference {1188, title = {Detecting misbehaving items in a CAT environment}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1997}, address = {Chicago, IL}, author = {Swygert, K.} } @conference {1210, title = {Detection of aberrant response patterns in CAT}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1997}, address = {Chicago IL}, author = {van der Linden, W. J.} } @article {522, title = {Developing and scoring an innovative computerized writing assessment}, journal = {Journal of Educational Measurement}, volume = {34}, year = {1997}, pages = {21-41}, author = {Davey, T. and Godwin, J., and Mittelholz, D.} } @article {721, title = {Diagnostic adaptive testing: Effects of remedial instruction as empirical validation}, journal = {Journal of Educational Measurement}, volume = {34}, year = {1997}, pages = {3-20}, author = {Tatsuoka, K. K. and Tatsuoka, M. M.} } @article {292, title = {The distribution of indexes of person fit within the computerized adaptive testing environment}, journal = {Applied Psychological Measurement}, volume = {21}, number = {2}, year = {1997}, note = {Journal; Peer Reviewed Journal}, pages = {115-127}, abstract = {The extent to which a trait estimate represents the underlying latent trait of interest can be estimated by using indexes of person fit. Several statistical methods for indexing person fit have been proposed to identify nonmodel-fitting response vectors. These person-fit indexes have generally been found to follow a standard normal distribution for conventionally administered tests. The present investigation found that within the context of computerized adaptive testing (CAT) these indexes tended not to follow a standard normal distribution. As the item pool became less discriminating, as the CAT termination criterion became less stringent, and as the number of items in the pool decreased, the distributions of the indexes approached a standard normal distribution. It was determined that under these conditions the indexes{\textquoteright} distributions approached standard normal distributions because more items were being administered. However, even when over 50 items were administered in a CAT the indexes were distributed in a fashion that was different from what was expected. (PsycINFO Database Record (c) 2006 APA )}, keywords = {Adaptive Testing, Computer Assisted Testing, Fit, Person Environment}, author = {Nering, M. L.} } @article {822, title = {The effect of adaptive administration on the variability of the Mantel-Haenszel measure of differential item functioning}, journal = {Educational and Psychological Measurement}, volume = {57}, year = {1997}, pages = {412-421}, author = {Zwick, R.} } @article {67, title = {The effect of population distribution and method of theta estimation on computerized adaptive testing (CAT) using the rating scale model}, journal = {Educational \& Psychological Measurement}, volume = {57}, number = {3}, year = {1997}, note = {Sage Publications, US}, pages = {422-439}, abstract = {Investigated the effect of population distribution on maximum likelihood estimation (MLE) and expected a posteriori estimation (EAP) in a simulation study of computerized adaptive testing (CAT) based on D. Andrich{\textquoteright}s (1978) rating scale model. Comparisons were made among MLE and EAP with a normal prior distribution and EAP with a uniform prior distribution within 2 data sets: one generated using a normal trait distribution and the other using a negatively skewed trait distribution. Descriptive statistics, correlations, scattergrams, and accuracy indices were used to compare the different methods of trait estimation. The EAP estimation with a normal prior or uniform prior yielded results similar to those obtained with MLE, even though the prior did not match the underlying trait distribution. An additional simulation study based on real data suggested that more work is needed to determine the optimal number of quadrature points for EAP in CAT based on the rating scale model. The choice between MLE and EAP for particular measurement situations is discussed. (PsycINFO Database Record (c) 2003 APA, all rights reserved).}, keywords = {computerized adaptive testing}, author = {Chen, S-K. and Hou, L. Y. and Fitzpatrick, S. J. and Dodd, B. G.} } @article {501, title = {The effect of population distribution and methods of theta estimation on computerized adaptive testing (CAT) using the rating scale model}, journal = {Educational and Psychological Measurement}, volume = {57}, year = {1997}, pages = {422-439}, author = {Chen, S. and Hou, L. and Fitzpatrick, S. J. and Dodd, B.} } @conference {1147, title = {The effects of motivation on equating adaptive and conventional tests}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1997}, address = {Chicago IL}, author = {Segall, D. O.} } @inbook {1885, title = {Equating the CAT-ASVAB}, year = {1997}, address = {W. A. Sands, B. K. Waters, and J. R. McBride (Eds.), Computerized adaptive testing: From inquiry to operation (pp. 181-198). Washington DC: American Psychological Association.}, author = {Segall, D. O.} } @conference {1239, title = {Essentially unbiased EAP estimates in computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1997}, note = {$\#$WA97-01 PDF file, 225 K}, address = {Chicago}, author = {Wang, T.} } @article {472, title = {Evaluating an automatically scorable, open-ended response type for measuring mathematical reasoning in computer-adaptive tests}, year = {1997}, author = {Bennett, R. E. and Steffen, M. and Singley, M.K. and Morley, M. and Jacquemin, D.} } @conference {1241, title = {Evaluating comparability in computerized adaptive testing: A theoretical framework with an example}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1997}, address = {Chicago}, author = {Wang, T. and Kolen, M. J.} } @inbook {1792, title = {Evaluating item calibration medium in computerized adaptive testing}, year = {1997}, address = {W.A. Sands, B.K. Waters and J.R. McBride, Computerized adaptive testing: From inquiry to operation (pp. 161-168). Washington, DC: American Psychological Association.}, author = {Hetter, R. D. and Segall, D. O. and Bloxom, B. M.} } @conference {1264, title = {Examinee issues in CAT}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1997}, note = {[ERIC ED 408 329]}, address = {Chicago IL}, author = {Wise, S. L.} } @article {668, title = {Flawed items in computerized adaptive testing}, journal = {Journal of Educational Measurement}, volume = {4}, year = {1997}, note = {(Also Educational Testing Service RR-94-06)}, pages = {79-96}, author = {Potenza, M. T. and Stocking, M. L.} } @conference {984, title = {Getting more precision on computer adaptive testing}, booktitle = {Paper presented at the 62nd Annual meeting of Psychometric Society}, year = {1997}, address = {University of Tennessee, Knoxville, TN}, author = {Krass, I. A.} } @conference {1199, title = {The goal of equity within and between computerized adaptive tests and paper and pencil forms. }, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1997}, address = {Chicago IL}, author = {Thomasson, G. L.} } @article {331, title = {Health status assessment for the twenty-first century: item response theory, item banking and computer adaptive testing}, journal = {Quality of Life Research}, volume = {6}, number = {6}, year = {1997}, note = {Revicki, D ACella, D FEnglandQuality of life research : an international journal of quality of life aspects of treatment, care and rehabilitationQual Life Res. 1997 Aug;6(6):595-600.}, month = {Aug}, pages = {595-600}, edition = {1997/08/01}, abstract = {Health status assessment is frequently used to evaluate the combined impact of human immunodeficiency virus (HIV) disease and its treatment on functioning and well-being from the patient{\textquoteright}s perspective. No single health status measure can efficiently cover the range of problems in functioning and well-being experienced across HIV disease stages. Item response theory (IRT), item banking and computer adaptive testing (CAT) provide a solution to measuring health-related quality of life (HRQoL) across different stages of HIV disease. IRT allows us to examine the response characteristics of individual items and the relationship between responses to individual items and the responses to each other item in a domain. With information on the response characteristics of a large number of items covering a HRQoL domain (e.g. physical function, and psychological well-being), and information on the interrelationships between all pairs of these items and the total scale, we can construct more efficient scales. Item banks consist of large sets of questions representing various levels of a HRQoL domain that can be used to develop brief, efficient scales for measuring the domain. CAT is the application of IRT and item banks to the tailored assessment of HRQoL domains specific to individual patients. Given the results of IRT analyses and computer-assisted test administration, more efficient and brief scales can be used to measure multiple domains of HRQoL for clinical trials and longitudinal observational studies.}, keywords = {*Health Status, *HIV Infections/diagnosis, *Quality of Life, Diagnosis, Computer-Assisted, Disease Progression, Humans, Psychometrics/*methods}, isbn = {0962-9343 (Print)}, author = {Revicki, D. A. and Cella, D. F.} } @conference {1114, title = {Identifying similar item content clusters on multiple test forms}, booktitle = {Paper presented at the Psychometric Society meeting}, year = {1997}, address = {Gatlinburg, TN, June}, author = {Reckase, M. D. and Thompson, T.D. and Nering, M.} } @conference {1220, title = {Improving the quality of music aptitude tests through adaptive administration of items}, booktitle = {Paper presented at Multidisciplinary Perspectives on Musicality: The Seashore Symposium}, year = {1997}, address = {University of Iowa, Iowa City IA}, author = {Vispoel, W. P.} } @booklet {1504, title = {Incorporating content constraints into a multi-stage adaptive testlet design: LSAC report}, year = {1997}, address = {Newtown, PA: Law School Admission Council}, author = {Reese, L. M. and Schnipke, D. L. and Luebke, S. W.} } @conference {1167, title = {Incorporating decision consistency into Bayesian sequential testing}, booktitle = {Paper presented at the annual meeting of National Council on Measurement in Education}, year = {1997}, address = {Chicago}, author = {Smith, R. and Lewis, C.} } @article {667, title = {An investigation of self-adapted testing in a Spanish high school population}, journal = {Educational and Psychological Measurement}, volume = {57}, year = {1997}, pages = {210-221}, author = {Ponsoda, V. and Wise, S. L. and Olea, J. and Revuelta, J.} } @inbook {196, title = {Item exposure control in CAT-ASVAB}, booktitle = {Computerized adaptive testing: From inquiry to operation}, year = {1997}, pages = {141-144}, publisher = {American Psychological Association}, organization = {American Psychological Association}, address = {Washington D.C., USA}, abstract = {Describes the method used to control item exposure in computerized adaptive testing-Armed Services Vocational Aptitude Battery (CAT-ASVAB). The method described was developed specifically to ensure that CAT-ASVAB items were expose no more often than the items in the printers ASVAB{\textquoteright}s alternate forms, ensuring that CAT ASVAB is nor more vulnerable than printed ASVAB forms to comprise from item exposure. (PsycINFO Database Record (c) 2010 APA, all rights reserved)}, author = {Hetter, R. D. and Sympson, J. B.}, editor = {J. R. McBride} } @inbook {1891, title = {Item pool development and evaluation}, year = {1997}, address = {W. A. Sands, B. K. Waters, and J. R. McBride (Eds.), Computerized adaptive testing: From inquiry to operation (pp. 117-130). Washington DC: American Psychological Association.}, author = {Segall, D. O. and Moreno, K. E. and Hetter, D. H.} } @conference {969, title = {Item pool development and maintenance}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1997}, address = {Chicago IL}, author = {Kingsbury, G. G.} } @booklet {1434, title = {Linking scores for computer-adaptive and paper-and-pencil administrations of the SAT (Research Report No 97-12)}, year = {1997}, note = {$\#$LA97-12}, address = {Princeton NJ: Educational Testing Service}, author = {Lawrence, I. and Feigenbaum, M.} } @conference {1006, title = {Maintaining a CAT item pool with operational data}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1997}, address = {Chicago IL}, author = {Levine, M. L. and Segall, D. O. and Williams, B. A.} } @conference {1092, title = {Maintaining item and test security in a CAT environment: A simulation study}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1997}, address = {Chicago IL)}, author = {Patsula, L N. and Steffen, M.} } @conference {961, title = {Mathematical programming approaches to computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1997}, address = {Chicago IL}, author = {Jones, D. H.} } @booklet {1550, title = {A minimax sequential procedure in the context of computerized adaptive mastery testing (Research Report 97-07)}, year = {1997}, address = {Twente, The Netherlands: Department of Educational Measurement and Data Analysis}, author = {Vos, H. J.} } @booklet {1468, title = {Modification of the Computerized Adaptive Screening Test (CAST) for use by recruiters in all military services}, year = {1997}, address = {Final Technical Report FR-WATSD-97-24, Contract No. MDA903-93-D-0032, DO 0054. Alexandria VA: Human Resources Research Organization.}, author = {J. R. McBride and Cooper, R. R} } @booklet {1530, title = {Multidimensional adaptive testing with a minimum error-variance criterion (Research Report 97-03)}, year = {1997}, address = {Enschede, The Netherlands: University of Twente, Department of Educational Measurement and Data Analysis}, author = {van der Linden, W. J.} } @conference {1209, title = {Multidimensional adaptive testing with a minimum error-variance criterion}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1997}, address = {Chicago}, author = {van der Linden, W. J.} } @conference {872, title = {Multi-stage CAT with stratified design}, booktitle = {Paper presented at the annual meeting of the Psychometric Society. Gatlinburg TN.}, year = {1997}, author = {Chang, Hua-Hua and Ying, Z.} } @article {496, title = {Nonlinear sequential designs for logistic item response theory models with applications to computerized adaptive tests}, journal = {The Annals of Statistics.}, year = {1997}, author = {Chang, Hua-Hua and Ying, Z.} } @article {369, title = {On-line performance assessment using rating scales}, journal = {Journal of Outcomes Measurement}, volume = {1}, number = {3}, year = {1997}, note = {1090-655X (Print)Journal Article}, pages = {173-191}, abstract = {The purpose of this paper is to report on the development of the on-line performance assessment instrument--the Assessment of Motor and Process Skills (AMPS). Issues that will be addressed in the paper include: (a) the establishment of the scoring rubric and its implementation in an extended Rasch model, (b) training of raters, (c) validation of the scoring rubric and procedures for monitoring the internal consistency of raters, and (d) technological implementation of the assessment instrument in a computerized program.}, keywords = {*Outcome Assessment (Health Care), *Rehabilitation, *Software, *Task Performance and Analysis, Activities of Daily Living, Humans, Microcomputers, Psychometrics, Psychomotor Performance}, author = {Stahl, J. and Shumway, R. and Bergstrom, B. and Fisher, A.} } @book {1658, title = {Optimization methods in computerized adaptive testing}, year = {1997}, address = {Unpublished doctoral dissertation, Rutgers University, New Brunswick NJ}, author = {Cordova, M. J.} } @conference {1265, title = {Overview of practical issues in a CAT program}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1997}, note = {[ERIC ED 408 330]}, address = {Chicago IL}, author = {Wise, S. L.} } @conference {1086, title = {An overview of the LSAC CAT research agenda}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1997}, address = {Chicago IL}, author = {Pashley, P.} } @conference {1033, title = {Overview of the USMLE Step 2 computerized field test}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1997}, address = {Chicago IL}, author = {Luecht, RM and Nungester, R. J.} } @inbook {1837, title = {Policy and program management perspective}, year = {1997}, address = {W.A. Sands, B.K. Waters, and J.R. McBride (Eds.), Computerized adaptive testing: From inquiry to operation. Washington, DC: American Psychological Association.}, author = {Martin, C.J. and Hoshaw, C.R.} } @inbook {1854, title = {Preliminary psychometric research for CAT-ASVAB: Selecting an adaptive testing strategy}, year = {1997}, address = {W. A. Sands, B. K. Waters, and J. R. McBride (Eds.), Computerized adaptive testing: From inquiry to operation (pp. 83-95). Washington DC: American Psychological Association.}, author = {J. R. McBride and Wetzel, C. D. and Hetter, R. D.} } @conference {1248, title = {Protecting the integrity of the CAT item pool}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1997}, address = {Chicago IL}, author = {Way, W. D.} } @conference {938, title = {Psychometric mode effects and fit issues with respect to item difficulty estimates}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1997}, address = {Chicago IL}, author = {Hadidi, A. and Luecht, RM} } @inbook {1890, title = {Psychometric procedures for administering CAT-ASVAB}, year = {1997}, address = {W. A. Sands, B. K. Waters, and J. R. McBride (Eds.), Computerized adaptive testing: From inquiry to operation (pp. 131-140). Washington D.C.: American Psychological Association.}, author = {Segall, D. O. and Moreno, K. E. and Bloxom, B. M. and Hetter, R. D.} } @conference {891, title = {Realistic simulation procedures for item response data}, booktitle = {In T. Miller (Chair), High-dimensional simulation of item response data for CAT research. Psychometric Society}, year = {1997}, note = {Symposium presented at the annual meeting of the Psychometric Society, Gatlinburg TN.}, address = {Gatlinburg TN}, author = {Davey, T. and Nering, M. and Thompson, T.} } @conference {1186, title = {Relationship of response latency to test design, examinee ability, and item difficulty in computer-based test administration}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1997}, address = {Chicago IL}, author = {Swanson, D. B. and Featherman, C. M. and Case, A. M. and Luecht, RM and Nungester, R.} } @inbook {1860, title = {Reliability and construct validity of CAT-ASVAB}, year = {1997}, address = {W. A. Sands, B. K. Waters, and J. R. McBride (Eds.). Computerized adaptive testing: From inquiry to operation (pp. 169-179). Washington DC: American Psychological Association.}, author = {Moreno, K. E. and Segall, O. D.} } @inbook {270, title = {Research antecedents of applied adaptive testing}, booktitle = {Computerized adaptive testing: From inquiry to practice}, year = {1997}, pages = {47-57}, publisher = {American Psychological Association}, organization = {American Psychological Association}, edition = {xviii}, address = {Washington D.C. USA}, abstract = {(from the chapter) This chapter sets the stage for the entire computerized adaptive testing Armed Services Vocational Aptitude Battery (CAT-ASVAB) development program by describing the state of the art immediately preceding its inception. By the mid-l970s, a great deal of research had been conducted that provided the technical underpinnings needed to develop adaptive tests, but little research had been done to corroborate empirically the promising results of theoretical analyses and computer simulation studies. In this chapter, the author summarizes much of the important theoretical and simulation research prior to 1977. In doing so, he describes a variety of approaches to adaptive testing, and shows that while many methods for adaptive testing had been proposed, few practical attempts had been made to implement it. Furthermore, the few instances of adaptive testing were based primarily on traditional test theory, and were developed in laboratory settings for purposes of basic research. The most promising approaches, those based on item response theory and evaluated analytically or by means of computer simulations, remained to be proven in the crucible of live testing. (PsycINFO Database Record (c) 2004 APA, all rights reserved).}, keywords = {computerized adaptive testing}, author = {J. R. McBride}, editor = {B. K. Waters and J. R. McBride} } @article {371, title = {Revising item responses in computerized adaptive tests: A comparison of three models}, journal = {Applied Psychological Measurement}, volume = {21}, number = {2}, year = {1997}, note = {Sage Publications, US}, pages = {129-142}, abstract = {Interest in the application of large-scale computerized adaptive testing has focused attention on issues that arise when theoretical advances are made operational. One such issue is that of the order in which exaniinees address questions within a test or separately timed test section. In linear testing, this order is entirely under the control of the examinee, who can look ahead at questions and return and revise answers to questions. Using simulation, this study investigated three models that permit restricted examinee control over revising previous answers in the context of adaptive testing. Even under a worstcase model of examinee revision behavior, two of the models of permitting item revisions worked well in preserving test fairness and accuracy. One model studied may also preserve some cognitive processing styles developed by examinees for a linear testing environment. }, keywords = {computerized adaptive testing}, author = {Stocking, M. L.} } @article {687, title = {The role of item feedback in self-adapted testing}, journal = {Educational and Psychological Measurement}, volume = {57}, year = {1997}, pages = {85-98}, author = {Roos, L. L. and Wise, S. L. and Plake, B. S.} } @article {683, title = {Self-adapted testing: Improving performance by modifying tests instead of examinees}, journal = {Stress \& Coping: An International Journal}, volume = {10(1)}, year = {1997}, pages = {83-104}, abstract = {This paper describes self-adapted testing and some of the evidence concerning its effects, presents possible theoretical explanations for those effects, and discusses some of the practical concerns regarding self-adapted testing. Self-adapted testing is a variant of computerized adapted testing in which the examine makes dynamic choices about the difficulty of the items he or she attempts. Self-adapted testing generates scores that are, in constrast to computerized adapted test and fixed-item tests, uncorrelated with a measure of trait test anxiety. This lack of correlation with an irrelevant attribute of the examine is evidence of an improvement in the construct validity of the scores. This improvement comes at the cost of a decrease in testing efficiency. The interaction between test anxiety and test administration mode is more consistent with an interference theory of test anxiety than a deficit theory. Some of the practical concerns regarding self-adapted testing can be ruled out logically, but others await empirical investigation.}, author = {Rocklin, T.} } @booklet {1628, title = {Simulating the use of disclosed items in computerized adaptive testing (Research Report 97-10)}, year = {1997}, address = {Princeton NJ: Educational Testing Service}, author = {Stocking, M. L. and W. C. Ward and Potenza, M. T.} } @conference {1072, title = {Simulation of realistic ability vectors}, booktitle = {Paper presented at the Psychometric Society meeting}, year = {1997}, address = {Gatlinburg TN}, author = {Nering, M. and Thompson, T.D. and Davey, T.} } @conference {1128, title = {A simulation study of the use of the Mantel-Haenszel and logistic regression procedures for assessing DIF in a CAT environment}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1997}, address = {Chicago IL}, author = {Ross, L. P. and Nandakumar, R, and Clauser, B. E.} } @article {477, title = {Some new item selection criteria for adaptive testing}, journal = {Journal of Educational and Behavioral Statistics}, volume = {22}, year = {1997}, pages = {203-226}, author = {Berger, M. P. F., and Veerkamp, W. J. J.} } @article {754, title = {Some new item selection criteria for adaptive testing}, journal = {Journal of Educational and Behavioral Statistics}, volume = {22}, year = {1997}, pages = {203-226}, author = {Veerkamp, W. J. J., and Berger, M. P. F.} } @conference {970, title = {Some questions that must be addressed to develop and maintain an item pool for use in an adaptive test}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1997}, address = {Chicago IL}, author = {Kingsbury, G. G.} } @book {1707, title = {Statistical methods for computerized adaptive testing}, year = {1997}, address = {Unpublished doctoral dissertation, University of Twente, Enschede, The Netherlands}, author = {Veerkamp, W. J. J.} } @inbook {1847, title = {Technical perspective}, year = {1997}, address = {W. A. Sands, B. K. Waters, and J. R. McBride (Eds.), Computerized adaptive testing: From inquiry to operation (pp. 29-44). Washington, DC: American Psychological Association.}, author = {J. R. McBride} } @article {678, title = {Una soluci{\'o}n a la estimati{\'o}n inicial en los tests adaptivos informatizados [A solution to initial estimation in CATs.] }, journal = {Revista Electr{\'o}nica de Metodolog{\'\i}a Aplicada}, volume = {2}, year = {1997}, pages = {1-6}, author = {Revuelta, J. and Ponsoda, V.} } @booklet {1606, title = {Unidimensional approximations for a computerized adaptive test when the item pool and latent space are multidimensional (Research Report 97-5)}, year = {1997}, address = {Iowa City IA: ACT Inc}, author = {Spray, J. A. and Abdel-Fattah, A. A. and Huang, C.-Y. and Lau, CA} } @conference {290, title = {Validation of CATSIB To investigate DIF of CAT data}, booktitle = {annual meeting of the American Educational Research Association}, year = {1997}, address = {Chicago, IL. USA}, abstract = {This paper investigates the performance of CATSIB (a modified version of the SIBTEST computer program) to assess differential item functioning (DIF) in the context of computerized adaptive testing (CAT). One of the distinguishing features of CATSIB is its theoretically built-in regression correction to control for the Type I error rates when the distributions of the reference and focal groups differ on the intended ability. This phenomenon is also called impact. The Type I error rate of CATSIB with the regression correction (WRC) was compared with that of CATSIB without the regression correction (WORC) to see if the regression correction was indeed effective. Also of interest was the power level of CATSIB after the regression correction. The subtest size was set at 25 items, and sample size, the impact level, and the amount of DIF were varied. Results show that the regression correction was very useful in controlling for the Type I error, CATSIB WORC had inflated observed Type I errors, especially when impact levels were high. The CATSIB WRC had observed Type I error rates very close to the nominal level of 0.05. The power rates of CATSIB WRC were impressive. As expected, the power increased as the sample size increased and as the amount of DIF increased. Even for small samples with high impact rates, power rates were 64\% or higher for high DIF levels. For large samples, power rates were over 90\% for high DIF levels. (Contains 12 tables and 7 references.) (Author/SLD)}, keywords = {computerized adaptive testing}, author = {Nandakumar, R. and Roussos, L. A.} } @inbook {1892, title = {Validation of the experimental CAT-ASVAB system}, year = {1997}, address = {W. A. Sands, B. K. Waters, and J. R. McBride (Eds.), Computerized adaptive testing: From inquiry to operation. Washington, DC: American Psychological Association.}, author = {Segall, D. O. and Moreno, K. E. and Kieckhaefer, W. F. and Vicino, F. L. and J. R. McBride} } @inbook {1756, title = {Adaptive assessment and training using the neighbourhood of knowledge states}, year = {1996}, address = {Frasson, C. and Gauthier, G. and Lesgold, A. (eds.) Intelligent Tutoring Systems, Third International Conference, ITS{\textquoteright}96, Montral, Canada, June 1996 Proceedings. Lecture Notes in Computer Science 1086. Berlin Heidelberg: Springer-Verlag 578-587.}, author = {Dowling, C. E. and Hockemeyer, C. and Ludwig, A .H.} } @inbook {1747, title = {Adaptive assessment using granularity hierarchies and Bayesian nets}, year = {1996}, address = {Frasson, C. and Gauthier, G. and Lesgold, A. (Eds.) Intelligent Tutoring Systems, Third International Conference, ITS{\textquoteright}96, Montr{\'e}al, Canada, June 1996 Proceedings. Lecture Notes in Computer Science 1086. Berlin Heidelberg: Springer-Verlag 569-577.}, author = {Collins, J. A. and Greer, J. E. and Huang, S. X.} } @book {1657, title = {Adaptive testing with granularity}, year = {1996}, address = {Masters thesis, University of Saskatchewan, Department of Computer Science}, author = {Collins, J. A.} } @article {710, title = {An alternative method for scoring adaptive tests}, journal = {Journal of Educational and Behavioral Statistics}, volume = {21}, year = {1996}, note = {(Also Educational Testing Service RR 94-48.)}, pages = {365-389}, author = {Stocking, M. L.} } @article {736, title = {Bayesian item selection criteria for adaptive testing}, journal = {Psychometrika}, volume = {63}, year = {1996}, pages = {201-216}, author = {van der Linden, W. J.} } @booklet {1529, title = {Bayesian item selection criteria for adaptive testing (Research Report 96-01)}, year = {1996}, address = {Twente, The Netherlands: Department of Educational Measurement and Data Analysis}, author = {van der Linden, W. J.} } @conference {871, title = {Building a statistical foundation for computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the Psychometric Society}, year = {1996}, address = {Banff, Alberta, Canada}, author = {Chang, Hua-Hua and Ying, Z.} } @conference {1127, title = {Can examinees use a review option to positively bias their scores on a computerized adaptive test? Paper presented at the annual meeting of the National council on Measurement in Education, New York}, booktitle = {Paper presented at the annual meeting of the National council on Measurement in Education}, year = {1996}, address = {New York NY}, author = {Rocklin, T. R. and Vispoel, W. P. and Wang, T. and Bleiler, T. L.} } @book {1706, title = {A comparison of adaptive self-referenced testing and classical approaches to the measurement of individual change}, year = {1996}, address = {Unpublished doctoral dissertation, University of Minnesota}, author = {VanLoy, W. J.} } @article {367, title = {Comparison of SPRT and sequential Bayes procedures for classifying examinees into two categories using a computerized test}, journal = {Journal of Educational \& Behavioral Statistics}, volume = {21}, year = {1996}, pages = {405-414}, author = {Spray, J. A. and Reckase, M. D.} } @conference {382, title = {A comparison of the traditional maximum information method and the global information method in CAT item selection}, booktitle = {annual meeting of the National Council on Measurement in Education}, year = {1996}, address = {New York, NY USA}, keywords = {computerized adaptive testing, item selection}, author = {Tang, K. L.} } @article {700, title = {Computerized adaptive skill assessment in a statewide testing program}, journal = {Journal of Research on Computing in Education}, volume = {29(1)}, year = {1996}, pages = {49-67}, author = {Shermis, M. D. and Stemmer, P. M. and Webb, P. M.} } @booklet {1369, title = {Computerized adaptive testing for classifying examinees into three categories (Measurement and Research Department Rep 96-3)}, year = {1996}, note = {$\#$EG96-3 . [Reprinted in Chapter 5 in $\#$EG04-01]}, address = {Arnhem, The Netherlands: Cito}, author = {Theo Eggen and Straetmans, G. J. J. M.} } @article {699, title = {Computerized adaptive testing for reading assessment and diagnostic assessment}, journal = {Journal of Developmental Education}, number = {20}, year = {1996}, pages = {18-20}, author = {Shermis, M. D. and et. al.} } @article {479, title = {Computerized adaptive testing for the national certification examination}, journal = {AANA.J}, volume = {64}, year = {1996}, pages = {119-124}, author = {Bergstrom, Betty A.} } @article {476, title = {Computerized adaptive testing for the national certification examination}, year = {1996}, author = {Bergstrom, Betty A.} } @conference {1163, title = {Computing scores for incomplete GRE General computer adaptive tests}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1996}, address = {New York NY}, author = {Slater, S. C. and Schaffer, G.A.} } @article {686, title = {Conducting self-adapted testing using MicroCAT}, journal = {Educational and Psychological Measurement}, volume = {56}, year = {1996}, pages = {821-827}, author = {Roos, L. L. and Wise, S. L. and Yoes, M. E. and Rocklin, T. R.} } @conference {894, title = {Constructing adaptive tests to parallel conventional programs}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1996}, address = {New York}, author = {Davey, T. and Thomas, L.} } @inbook {1795, title = {A content-balanced adaptive testing algorithm for computer-based training systems}, year = {1996}, address = {Frasson, C. and Gauthier, G. and Lesgold, A. (Eds.), Intelligent Tutoring Systems, Third International Conference, ITS{\textquoteright}96, Montr�al, Canada, June 1996 Proceedings. Lecture Notes in Computer Science 1086. Berlin Heidelberg: Springer-Verlag 306-314.}, author = {Huang, S. X.} } @conference {1263, title = {A critical analysis of the argument for and against item review in computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1996}, address = {New York}, author = {Wise, S. L.} } @conference {1052, title = {Current research in computer-based testing for personnel selection and classification in the United States}, booktitle = {Invited address to the Centre for Recruitment and Selection, Belgian Armed Forces"}, year = {1996}, author = {J. R. McBride} } @article {216, title = {Dispelling myths about the new NCLEX exam}, journal = {Recruitment, Retention, and Restructuring Report}, volume = {9}, number = {1}, year = {1996}, note = {Journal Article}, month = {Jan-Feb}, pages = {6-7}, abstract = {The new computerized NCLEX system is working well. Most new candidates, employers, and board of nursing representatives like the computerized adaptive testing system and the fast report of results. But, among the candidates themselves some myths have grown which cause them needless anxiety.}, keywords = {*Educational Measurement, *Licensure, Humans, Nursing Staff, Personnel Selection, United States}, author = {Johnson, S. H.} } @article {23, title = {Dynamic scaling: An ipsative procedure using techniques from computer adaptive testing}, journal = {Dissertation Abstracts International: Section B: the Sciences \& Engineering}, volume = {56}, number = {10-B}, year = {1996}, pages = {5824}, abstract = {The purpose of this study was to create a prototype method for scaling items using computer adaptive testing techniques and to demonstrate the method with a working model program. The method can be used to scale items, rank individuals with respect to the scaled items, and to re-scale the items with respect to the individuals{\textquoteright} responses. When using this prototype method, the items to be scaled are part of a database that contains not only the items, but measures of how individuals respond to each item. After completion of all presented items, the individual is assigned an overall scale value which is then compared with each item responded to, and an individual "error" term is stored with each item. After several individuals have responded to the items, the item error terms are used to revise the placement of the scaled items. This revision feature allows the natural adaptation of one general list to reflect subgroup differences, for example, differences among geographic areas or ethnic groups. It also provides easy revision and limited authoring of the scale items by the computer program administrator. This study addressed the methodology, the instrumentation needed to handle the scale-item administration, data recording, item error analysis, and scale-item database editing required by the method, and the behavior of a prototype vocabulary test in use. Analyses were made of item ordering, response profiles, item stability, reliability and validity. Although slow, the movement of unordered words used as items in the prototype program was accurate as determined by comparison with an expert word ranking. Person scores obtained by multiple administrations of the prototype test were reliable and correlated at.94 with a commercial paper-and-pencil vocabulary test, while holding a three-to-one speed advantage in administration. Although based upon self-report data, dynamic scaling instruments like the model vocabulary test could be very useful for self-assessment, for pre (PsycINFO Database Record (c) 2003 APA, all rights reserved).}, keywords = {computerized adaptive testing}, author = {Berg, S. R.} } @conference {826, title = {Effect of altering passing score in CAT when unidimensionality is violated}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1996}, month = {April}, address = {New York NY}, author = {Abdel-Fattah, A. A. and Lau, CA and Spray, J. A.} } @article {149, title = {The effect of individual differences variables on the assessment of ability for Computerized Adaptive Testing}, journal = {Dissertation Abstracts International: Section B: the Sciences \& Engineering}, volume = {57}, number = {6-B}, year = {1996}, pages = {4085}, abstract = {Computerized Adaptive Testing (CAT) continues to gain momentum as the accepted testing modality for a growing number of certification, licensure, education, government and human resource applications. However, the developers of these tests have for the most part failed to adequately explore the impact of individual differences such as test anxiety on the adaptive testing process. It is widely accepted that non-cognitive individual differences variables interact with the assessment of ability when using written examinations. Logic would dictate that individual differences variables would equally affect CAT. Two studies were used to explore this premise. In the first study, 507 examinees were given a test anxiety survey prior to taking a high stakes certification exam using CAT or using a written format. All examinees had already completed their course of study, and the examination would be their last hurdle prior to being awarded certification. High test anxious examinees performed worse than their low anxious counterparts on both testing formats. The second study replicated the finding that anxiety depresses performance in CAT. It also addressed the differential effect of anxiety on within test performance. Examinees were candidates taking their final certification examination following a four year college program. Ability measures were calculated for each successive part of the test for 923 subjects. Within subject performance varied depending upon test position. High anxious examinees performed poorly at all points in the test, while low and medium anxious examinee performance peaked in the middle of the test. If test anxiety and performance measures were actually the same trait, then low anxious individuals should have performed equally well throughout the test. The observed interaction of test anxiety and time on task serves as strong evidence that test anxiety has motivationally mediated as well as cognitively mediated effects. The results of the studies are di (PsycINFO Database Record (c) 2003 APA, all rights reserved).}, keywords = {computerized adaptive testing}, author = {Gershon, R. C.} } @conference {1221, title = {Effects of answer feedback and test anxiety on the psychometric and motivational characteristics of computer-adaptive and self-adaptive vocabulary tests}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education.}, year = {1996}, note = {$\#$VI96-01}, author = {Vispoel, W. P. and Brunsman, B. and Forte, E. and Bleiler, T.} } @conference {1226, title = {Effects of answer review and test anxiety on the psychometric and motivational characteristics of computer-adaptive and self-adaptive vocabulary tests}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1996}, address = {New York}, author = {Vispoel, W. and Forte, E. and Boo, J.} } @conference {952, title = {The effects of methods of theta estimation, prior distribution, and number of quadrature points on CAT using the graded response model}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1996}, address = {New York NY}, author = {Hou, L. and Chen, S. and Dodd. B. G. and Fitzpatrick, S. J.} } @book {1686, title = {The effects of person misfit in computerized adaptive testing}, year = {1996}, address = {Unpublished doctoral dissertation, University of Minnesota, Minneapolis}, author = {Nering, M. L.} } @conference {921, title = {Effects of randomesque item selection on CAT item exposure rates and proficiency estimation under 1- and 2-PL models}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1996}, address = {New York}, author = {Featherman, C. M. and Subhiyah, R. G. and Hadadi, A.} } @conference {1116, title = {An evaluation of a two-stage testlet design for computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the Psychometric Society}, year = {1996}, address = {Banff, Alberta, Canada}, author = {Reese, L. M. and Schnipke, D. L.} } @article {58, title = {A global information approach to computerized adaptive testing}, journal = {Applied Psychological Measurement}, volume = {20}, number = {3}, year = {1996}, pages = {213-229}, abstract = {based on Fisher information (or item information). At each stage, an item is selected to maximize the Fisher information at the currently estimated trait level (\&thetas;). However, this application of Fisher information could be much less efficient than assumed if the estimators are not close to the true \&thetas;, especially at early stages of an adaptive test when the test length (number of items) is too short to provide an accurate estimate for true \&thetas;. It is argued here that selection procedures based on global information should be used, at least at early stages of a test when \&thetas; estimates are not likely to be close to the true \&thetas;. For this purpose, an item selection procedure based on average global information is proposed. Results from pilot simulation studies comparing the usual maximum item information item selection with the proposed global information approach are reported, indicating that the new method leads to improvement in terms of bias and mean squared error reduction under many circumstances. Index terms: computerized adaptive testing, Fisher information, global information, information surface, item information, item response theory, Kullback-Leibler information, local information, test information.}, isbn = {0146-6216}, author = {Chang, Hua-Hua and Ying, Z.} } @article {2032, title = {A Global Information Approach to Computerized Adaptive Testing}, journal = {Applied Psychological Measurement}, volume = {20}, year = {1996}, pages = {213-229}, author = {Chang, H.-H. and Ying, Z.} } @conference {1032, title = {Heuristic-based CAT: Balancing item information, content, and exposure}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1996}, address = {New York NY}, author = {Luecht, RM and Hadadi, A. and Nungester, R. J.} } @conference {1034, title = {Heuristic-based CAT: Balancing item information, content and exposure}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1996}, address = {New York NY}, author = {Luecht, RM and Hadadi, A. and Nungester, R. J.} } @conference {1035, title = {Heuristics based CAT: Balancing item information, content, and exposure}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1996}, address = {New York NY}, author = {Luecht, RM and Nungester, R. J. and Hadadi, A.} } @conference {968, title = {Item review and adaptive testing}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1996}, address = {New York}, author = {Kingsbury, G. G.} } @article {137, title = {Methodologic trends in the healthcare professions: computer adaptive and computer simulation testing}, journal = {Nurse Education}, volume = {21}, number = {4}, year = {1996}, note = {Forker, J EMcDonald, M EUnited statesNurse educatorNurse Educ. 1996 Jul-Aug;21(4):13-4.}, month = {Jul-Aug}, pages = {13-4}, edition = {1996/07/01}, abstract = {Assessing knowledge and performance on computer is rapidly becoming a common phenomenon in testing and measurement. Computer adaptive testing presents an individualized test format in accordance with the examinee{\textquoteright}s ability level. The efficiency of the testing process enables a more precise estimate of performance, often with fewer items than traditional paper-and-pencil testing methodologies. Computer simulation testing involves performance-based, or authentic, assessment of the examinee{\textquoteright}s clinical decision-making abilities. The authors discuss the trends in assessing performance through computerized means and the application of these methodologies to community-based nursing practice.}, keywords = {*Clinical Competence, *Computer Simulation, Computer-Assisted Instruction/*methods, Educational Measurement/*methods, Humans}, isbn = {0363-3624 (Print)0363-3624 (Linking)}, author = {Forker, J. E. and McDonald, M. E.} } @article {677, title = {Metodos sencillos para el control de las tasas de exposicion en tests adaptativos informatizados [Simple methods for item exposure control in CATs]}, journal = {Psicologica}, volume = {17}, year = {1996}, pages = {161-172}, author = {Revuelta, J. and Ponsoda, V.} } @booklet {1479, title = {Missing responses and IRT ability estimation: Omits, choice, time limits, and adaptive testing (Research Report RR-96-30-ONR)}, year = {1996}, address = {Princeton NJ: Educational Testing Service}, author = {Mislevy, R. J. and Wu, P.-K.} } @conference {870, title = {A model for score maximization within a computerized adaptive testing environment}, booktitle = {Paper presented at the annual meeting of the NMCE}, year = {1996}, address = {New York NY}, author = {Chang, Hua-Hua} } @conference {1252, title = {Modifying the NCLEXTM CAT item selection algorithm to improve item exposure}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1996}, address = {New York}, author = {Way, W. D. and A Zara and Leahy, J.} } @article {353, title = {Multidimensional adaptive testing}, journal = {Psychometrika}, volume = {61}, number = {2}, year = {1996}, note = {Peer Reviewed Journalhttp://www.psychometrika.org/}, pages = {331-354}, abstract = {Maximum likelihood and Bayesian procedures for item selection and scoring of multidimensional adaptive tests are presented. A demonstration using simulated response data illustrates that multidimensional adaptive testing (MAT) can provide equal or higher reliabilities with about one-third fewer items than are required by one-dimensional adaptive testing (OAT). Furthermore, holding test-length constant across the MAT and OAT approaches, substantial improvements in reliability can be obtained from multidimensional assessment. A number of issues relating to the operational use of multidimensional adaptive testing are discussed.}, author = {Segall, D. O.} } @article {696, title = {Multidimensional adaptive testing}, journal = {Psychometrika}, volume = {61}, year = {1996}, pages = {331-354}, author = {Segall, D. O.} } @conference {917, title = {Multidimensional computer adaptive testing}, booktitle = {Paper presented at the Annual Meeting of the American Educational Research Association}, year = {1996}, note = {$\#$FA96-02}, address = {New York NY}, author = {Fan, M. and Hsu, Y.} } @article {259, title = {Multidimensional computerized adaptive testing in a certification or licensure context}, journal = {Applied Psychological Measurement}, volume = {20}, number = {4}, year = {1996}, note = {Sage Publications, US}, pages = {389-404}, abstract = {(from the journal abstract) Multidimensional item response theory (MIRT) computerized adaptive testing, building on a recent work by D. O. Segall (1996), is applied in a licensing/certification context. An example of a medical licensure test is used to demonstrate situations in which complex, integrated content must be balanced at the total test level for validity reasons, but items assigned to reportable subscore categories may be used under a MIRT adaptive paradigm to improve the reliability of the subscores. A heuristic optimization framework is outlined that generalizes to both univariate and multivariate statistical objective functions, with additional systems of constraints included to manage the content balancing or other test specifications on adaptively constructed test forms. Simulation results suggested that a multivariate treatment of the problem, although complicating somewhat the objective function used and the estimation of traits, nonetheless produces advantages from a psychometric perspective. (PsycINFO Database Record (c) 2003 APA, all rights reserved).}, keywords = {computerized adaptive testing}, author = {Luecht, RM} } @article {2033, title = {Multidimensional Computerized Adaptive Testing in a Certification or Licensure Context}, journal = {Applied Psychological Measurement}, volume = {20}, number = {4}, year = {1996}, pages = {389-404}, author = {Luecht, RM} } @conference {922, title = {New algorithms for item selection and exposure and proficiency estimation under 1- and 2-PL models}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1996}, address = {New York}, author = {Featherman, C. M. and Subhiyah, R. G. and Hadadi, A.} } @booklet {1626, title = {Optimal design of item pools for computerized adaptive testing (Research Report 96-34)}, year = {1996}, address = {Princeton NJ: Educational Testing Service}, author = {Stocking, M. L. and Swanson, L.} } @conference {895, title = {Person-fit indices and their role in the CAT environment}, booktitle = {Paper presented at the Annual meeting of the National Council on Measurement in Education}, year = {1996}, address = {New York NY}, author = {David, L. A. and Lewis, C.} } @conference {1060, title = {Person-fit indices and their role in the CAT environment}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1996}, address = {New York}, author = {McLeod, L. D. and Lewis, C.} } @article {645, title = {Practical issues in large-scale computerized adaptive testing}, journal = {Applied Measurement in Education}, volume = {9}, year = {1996}, pages = {287-304}, author = {Mills, C. N., and Stocking, M. L.} } @booklet {1404, title = {Preliminary cost-effectiveness analysis of alternative ASVAB testing concepts at MET sites}, year = {1996}, address = {Interim report to Defense Manpower Data Center. Fairfax, VA: Lewin-VHI, Inc.}, author = {Hogan, P.F. and Dall, T. and J. R. McBride} } @article {652, title = {Propiedades psicometricas du un test adaptivo informatizado do vocabulario ingles [Psychometric properties of a computerized adaptive tests for the measurement of English vocabulary]}, journal = {Estudios de Psicologica}, volume = {55}, year = {1996}, note = {[In Spanish]}, pages = {61-73}, author = {Olea., J. and Ponsoda, V. and Revuelta, J. and Belchi, J.} } @booklet {1346, title = {Recursive maximum likelihood estimation, sequential design, and computerized adaptive testing}, year = {1996}, address = {Princeton NJ: Educational Testing Service}, author = {Chang, Hua-Hua and Ying, Z.} } @booklet {1615, title = {Revising item responses in computerized adaptive testing: A comparison of three models (RR-96-12)}, year = {1996}, note = {$\#$ST96-12 [See APM paper 1997).}, address = {Princeton NJ: Educational Testing Service}, author = {Stocking, M. L.} } @book {1680, title = {Robustness of a unidimensional computerized testing mastery procedure with multidimensional testing data}, year = {1996}, note = {{PDF file, 1.838 MB}}, address = {Unpublished doctoral dissertation, University of Iowa, Iowa City IA}, author = {Lau, CA} } @conference {1166, title = {A search procedure to determine sets of decision points when using testlet-based Bayesian sequential testing procedures}, booktitle = {Paper presented at the annual meeting of National Council on Measurement in Education}, year = {1996}, address = {New York}, author = {Smith, R. and Lewis, C.} } @booklet {1454, title = {Some practical examples of computerized adaptive sequential testing (Internal Report)}, year = {1996}, address = {Philadelphia: National Board of Medical Examiners}, author = {Luecht, RM and Nungester, R. J.} } @conference {1253, title = {Strategies for managing item pools to maximize item security}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1996}, address = {San Diego}, author = {Way, W. D. and A Zara and Leahy, J.} } @inbook {299, title = {Test adaptativos informatizados [Computerized adaptive testing]}, booktitle = {Psicometr{\'\i}a}, year = {1996}, publisher = {Universitas}, organization = {Universitas}, address = {Madrid, UNED}, author = {Olea, J. and Ponsoda, V.} } @conference {1133, title = {A Type I error rate study of a modified SIBTEST DIF procedure with potential application to computerized adaptive tests}, booktitle = {Paper presented at the annual meeting of the Psychometric Society}, year = {1996}, address = {Alberta Canada}, author = {Roussos, L.} } @book {1644, title = {Users manual for the MicroCAT testing system, Version 3.5}, year = {1996}, note = {.}, address = {St Paul MN: Assessment Systems Corporation}, author = {Assessment-Systems-Corporation.} } @conference {991, title = {Using unidimensional IRT models for dichotomous classification via CAT with multidimensional data}, booktitle = {Poster session presented at the annual meeting of the American Educational Research Association}, year = {1996}, address = {Boston MA}, author = {Lau, CA and Abdel-Fattah, A. A. and Spray, J. A.} } @conference {918, title = {Utility of Fisher information, global information and different starting abilities in mini CAT}, booktitle = {Paper presented at the Annual Meeting of the National Council on Measurement in Education}, year = {1996}, note = {$\#$FA96-01}, address = {New York NY}, author = {Fan, M. and Hsu, Y.} } @article {267, title = {Validity of item selection: A comparison of automated computerized adaptive and manual paper and pencil examinations}, journal = {Teaching and Learning in Medicine}, volume = {8}, number = {3}, year = {1996}, pages = {152-157}, author = {Lunz, M. E. and Deville, C. W.} } @article {122, title = {Assessment of scaled score consistency in adaptive testing from a multidimensional item response theory perspective}, journal = {Dissertation Abstracts International: Section B: the Sciences \& Engineering}, volume = {55}, number = {12-B}, year = {1995}, pages = {5598}, abstract = {The purpose of this study was twofold: (a) to examine whether the unidimensional adaptive testing estimates are comparable for different ability levels of examinees when the true examinee-item interaction is correctly modeled using a compensatory multidimensional item response theory (MIRT) model; and (b) to investigate the effects of adaptive testing estimation when the procedure of item selection of computerized adaptive testing (CAT) is controlled by either content-balancing or selecting the most informative item in a user specified direction at the current estimate of unidimensional ability. A series of Monte Carlo simulations were conducted in this study. Deviation from the reference composite angle was used as an index of the theta1,theta2-composite consistency across the different levels of unidimensional CAT estimates. In addition, the effect of the content-balancing item selection procedure and the fixed-direction item selection procedure were compared across the different ability levels. The characteristics of item selection, test information and the relationship between unidimensional and multidimensional models were also investigated. In addition to employing statistical analysis to examine the robustness of the CAT procedure violations of unidimensionality, this research also included graphical analyses to present the results. The results were summarized as follows: (a) the reference angles for the no-control-item-selection method were disparate across the unidimensional ability groups; (b) the unidimensional CAT estimates from the content-balancing item selection method did not offer much improvement; (c) the fixed-direction-item selection method did provide greater consistency for the unidimensional CAT estimates across the different levels of ability; (d) and, increasing the CAT test length did not provide greater score scale consistency. Based on the results of this study, the following conclusions were drawn: (a) without any controlling (PsycINFO Database Record (c) 2003 APA, all rights reserved).}, keywords = {computerized adaptive testing}, author = {Fan, Miechu} } @conference {1165, title = {A Bayesian computerized mastery model with multiple cut scores}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1995}, address = {San Francisco CA}, author = {Smith, R. L. and Lewis, C.} } @conference {1208, title = {Bayesian item selection in adaptive testing}, booktitle = {Paper presented at the Annual Meeting of the Psychometric Society}, year = {1995}, address = {Minneapolis MN}, author = {van der Linden, W. J.} } @article {341, title = {Comparability and validity of computerized adaptive testing with the MMPI-2}, journal = {Journal of Personality Assessment}, volume = {65}, number = {2}, year = {1995}, note = {Roper, B LBen-Porath, Y SButcher, J NUnited StatesJournal of personality assessmentJ Pers Assess. 1995 Oct;65(2):358-71.}, month = {Oct}, pages = {358-71}, edition = {1995/10/01}, abstract = {The comparability and validity of a computerized adaptive (CA) Minnesota Multiphasic Personality Inventory-2 (MMPI-2) were assessed in a sample of 571 undergraduate college students. The CA MMPI-2 administered adaptively Scales L, E the 10 clinical scales, and the 15 content scales, utilizing the countdown method (Butcher, Keller, \& Bacon, 1985). All subjects completed the MMPI-2 twice, with three experimental conditions: booklet test-retest, booklet-CA, and conventional computerized (CC)-CA. Profiles across administration modalities show a high degree of similarity, providing evidence for the comparability of the three forms. Correlations between MMPI-2 scales and other psychometric measures (Beck Depression Inventory; Symptom Checklist-Revised; State-Trait Anxiety and Anger Scales; and the Anger Expression Scale) support the validity of the CA MMPI-2. Substantial item savings may be realized with the implementation of the countdown procedure.}, isbn = {0022-3891 (Print)}, author = {Roper, B. L. and Ben-Porath, Y. S. and Butcher, J. N.} } @conference {913, title = {Comparability studies for the GRE CAT General Test and the NCLEX using CAT}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1995}, address = {San Francisco}, author = {Eignor, D. R. and Schaffer, G.A.} } @conference {1008, title = {A comparison of classification agreement between adaptive and full-length test under the 1-PL and 2-PL models}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1995}, note = {(cited in $\#$RE98311)}, address = {San Francisco CA}, author = {Lewis, M. J. and Subhiyah, R. G. and Morrison, C. A.} } @conference {858, title = {A comparison of gender differences on paper-and-pencil and computer-adaptive versions of the Graduate Record Examination}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1995}, address = {San Francisco CA}, author = {Bridgeman, B. and Schaeffer, G. A.} } @article {695, title = {A comparison of item selection routines in linear and adaptive tests}, journal = {Journal of Educational Measurement}, volume = {32}, year = {1995}, pages = {227-242}, author = {Schnipke, D. L., and Green, B. F.} } @conference {1249, title = {A comparison of two IRT-based models for computerized mastery testing when item parameter estimates are uncertain}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1995}, address = {San Francisco}, author = {Way, W. D. and Lewis, C. and Smith, R. L.} } @conference {1070, title = {Computer adaptive testing in a medical licensure setting: A comparison of outcomes under the one- and two- parameter logistic models}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1995}, address = {San Francisco}, author = {Morrison, C. A. and Nungester, R. J.} } @article {690, title = {Computer-adaptive testing: A new breed of assessment}, journal = {Journal of the American Dietetic Association}, volume = {95}, year = {1995}, pages = {1326-1327}, author = {Ruiz, B. and Fitz, P. A. and Lewis, C. and Reidy, C.} } @article {688, title = {Computer-adaptive testing: A new breed of assessment}, journal = {Journal of the American Dietetic Association}, volume = {95}, year = {1995}, pages = {1326-1327}, author = {Ruiz, B. and Fitz, P. A. and Lewis, C. and Reidy, C.} } @article {616, title = {Computer-adaptive testing: CAT: A Bayesian maximum-falsification approach}, journal = {Rasch Measurement Transactions}, volume = {9}, year = {1995}, pages = {412}, author = {Linacre, J. M.} } @book {1646, title = {Computerized adaptive attitude testing using the partial credit model}, year = {1995}, address = {Dissertation Abstracts International-A, 55(7-A), 1922 (UMI No. AAM9430378)}, author = {Baek, S. G.} } @article {263, title = {Computerized adaptive testing: Tracking candidate response patterns}, journal = {Journal of Educational Computing Research}, volume = {13}, number = {2}, year = {1995}, note = {Baywood Publishing, US}, pages = {151-162}, abstract = {Tracked the effect of candidate response patterns on a computerized adaptive test. Data were from a certification examination in laboratory science administered in 1992 to 155 candidates, using a computerized adaptive algorithm. The 90-item certification examination was divided into 9 units of 10 items each to track the pattern of initial responses and response alterations on ability estimates and test precision across the 9 test units. The precision of the test was affected most by response alterations during early segments of the test. While candidates generally benefited from altering responses, individual candidates showed different patterns of response alterations across test segments. Test precision was minimally affected, suggesting that the tailoring of computerized adaptive testing is minimally affected by response alterations. (PsycINFO Database Record (c) 2002 APA, all rights reserved).}, author = {Lunz, M. E. and Bergstrom, Betty A.} } @article {104, title = {Computerized adaptive testing with polytomous items}, journal = {Applied Psychological Measurement}, volume = {19}, year = {1995}, pages = {5{\textendash}22.}, abstract = {Discusses polytomous item response theory models and the research that has been conducted to investigate a variety of possible operational procedures (item bank, item selection, trait estimation, stopping rule) for polytomous model-based computerized adaptive testing (PCAT). Studies are reviewed that compared PCAT systems based on competing item response theory models that are appropriate for the same measurement objective, as well as applications of PCAT in marketing and educational psychology. Directions for future research using PCAT are suggested.}, author = {Dodd, B. G. and De Ayala, R. J., and Koch, W. R.} } @article {2031, title = {Computerized Adaptive Testing With Polytomous Items}, journal = {Applied Psychological Measurement}, volume = {19}, year = {1995}, pages = {5-22}, author = {Dodd, B. G. and De Ayala, R. J. and Koch. W.R.,} } @inbook {1920, title = {Computerized testing for licensure}, year = {1995}, address = {J. Impara (ed.), Licensure testing: Purposes, procedures, and Practices (pp. 291-320). Lincoln NE: Buros Institute of Mental Measurements.}, author = {Vale, C. D.} } @booklet {1621, title = {Controlling item exposure conditional on ability in computerized adaptive testing (Research Report 95-25)}, year = {1995}, note = {$\#$ST95-25; also see $\#$ST98057}, address = {Princeton NJ: Educational Testing Service.}, author = {Stocking, M. L. and Lewis, C.} } @conference {927, title = {Does cheating on CAT pay: Not}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1995}, note = {ERIC ED 392 844}, address = {San Francisco}, author = {Gershon, R. C. and Bergstrom, B.} } @conference {916, title = {The effect of ability estimation for polytomous CAT in different item selection procedures}, booktitle = {Paper presented at the Annual meeting of the Psychometric Society}, year = {1995}, address = {Minneapolis MN}, author = {Fan, M. and Hsu, Y.} } @conference {825, title = {The effect of model misspecification on classification decisions made using a computerized test: UIRT versus MIRT}, booktitle = {Paper presented at the annual meeting of the Psychometric Society}, year = {1995}, note = {$\#$AB95-01}, address = {Minneapolis MN}, author = {Abdel-Fattah, A. A. and Lau, C.-M. A.} } @conference {1174, title = {The effect of model misspecification on classification decisions made using a computerized test: 3-PLM vs. 1PLM (and UIRT versus MIRT)}, booktitle = {Paper presented at the Annual Meeting of the Psychometric Society}, year = {1995}, note = {$\#$SP95-01}, address = {Minneapolis, MN}, author = {Spray, J. A. and Kalohn, J.C. and Schulz, M. and Fleer, P. Jr.} } @conference {882, title = {The effect of population distribution and methods of theta estimation on CAT using the rating scale model}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1995}, address = {San Francisco}, author = {Chen, S. and Hou, L. and Fitzpatrick, S. J. and Dodd, B. G.} } @article {2126, title = {Effect of Rasch calibration on ability and DIF estimation in computer-adaptive tests}, journal = {Journal of Educational Measurement}, volume = {32}, year = {1995}, pages = {341-363}, author = {Zwick, R. and Thayer, D. T. and Wingersky, M.} } @article {685, title = {Effects and underlying mechanisms of self-adapted testing}, journal = {Journal of Educational Psychology}, volume = {87}, year = {1995}, pages = {103-116}, author = {Rocklin, T. R. and O{\textquoteright}Donnell, A. M. and Holst, P. M.} } @conference {1146, title = {The effects of item compromise on computerized adaptive test scores}, booktitle = {Paper presented at the meeting of the Society for Industrial and Organizational Psychology}, year = {1995}, address = {Orlando, FL}, author = {Segall, D. O.} } @book {1694, title = {El control de la exposicin de los items en tests adaptativos informatizados [Item exposure control in computerized adaptive tests]}, year = {1995}, address = {Unpublished master{\textquoteright}s dissertation, Universidad Autonma de Madrid, Spain}, author = {Revuelta, J.} } @conference {1036, title = {Equating computerized adaptive certification examinations: The Board of Registry series of studies}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1995}, address = {San Francisco}, author = {Lunz, M. E. and Bergstrom, Betty A.} } @conference {1145, title = {Equating the CAT-ASVAB: Experiences and lessons learned}, booktitle = {Paper presented at the meeting of the National Council on Measurement in Education}, year = {1995}, address = {San Francisco}, author = {Segall, D. O.} } @conference {1153, title = {Equating the CAT-ASVAB: Issues and approach}, booktitle = {Paper presented at the meeting of the National Council on Measurement in Education}, year = {1995}, address = {San Francisco}, author = {Segall, D. O. and Carter, G.} } @conference {1051, title = {Equating the computerized adaptive edition of the Differential Aptitude Tests}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education. San Francisco}, year = {1995}, address = {CA}, author = {J. R. McBride} } @conference {1190, title = {Estimation of item difficulty from restricted CAT calibration samples}, booktitle = {Paper presented at the annual conference of the National Council on Measurement in Education in San Francisco.}, year = {1995}, author = {Sykes, R. and Ito, K.} } @booklet {1405, title = {An evaluation of alternative concepts for administering the Armed Services Vocational Aptitude Battery to applicants for enlistment}, year = {1995}, address = {DMDC Technical Report 95-013. Monterey, CA: Personnel Testing Division, Defense Manpower Data Center}, author = {Hogan, P.F. and J. R. McBride and Curran, L. T.} } @inbook {1735, title = {From adaptive testing to automated scoring of architectural simulations}, year = {1995}, address = {L. E. Mancall and P. G. Bashook (Eds.), Assessing clinical reasoning: The oral examination and alternative methods (pp. 115-130. Evanston IL: The American Board of Medical Specialities.}, author = {Bejar, I. I.} } @conference {869, title = {A global information approach to computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1995}, address = {San Francisco CA}, author = {Chang, Hua-Hua} } @book {1641, title = {Guidelines for computer-adaptive test development and use in education}, year = {1995}, address = {Washington DC: Author}, author = {American-Council-on-Education.} } @inbook {1948, title = {Improving individual differences measurement with item response theory and computerized adaptive testing}, year = {1995}, address = {D. Lubinski and R. V. Dawis (Eds.), Assessing individual differences in human behavior: New concepts, methods, and findings (pp. 49-79). Palo Alto CA: Davies-Black Publishing.}, author = {Weiss, D. J.} } @inbook {1823, title = {Individualized testing in the classroom}, year = {1995}, address = {Anderson, L.W. (Ed.), International Encyclopedia of Teaching and Teacher Education. Oxford, New York, Tokyo: Elsevier Science 295-299.}, author = {Linacre, J. M.} } @conference {966, title = {The influence of examinee test-taking behavior motivation in computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1995}, note = {(ERIC No. ED392839)}, address = {San Francisco CA}, author = {Kim, J. and McLean, J. E.} } @booklet {1595, title = {The introduction and comparability of the computer-adaptive GRE General Test (GRE Board Professional Report 88-08ap; Educational Testing Service Research Report 95-20)}, year = {1995}, address = {Princeton NJ: Educational Testing Service}, author = {Schaeffer, G. A. and Steffen, M. Golub-Smith, M. L. and Mills, C. N. and Durso, R.} } @conference {945, title = {An investigation of item calibration procedures for a computerized licensure examination}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1995}, address = {San Francisco, CA}, author = {Haynie, K.A. and Way, W. D.} } @article {229, title = {An investigation of procedures for computerized adaptive testing using the successive intervals Rasch model}, journal = {Educational and Psychological Measurement}, volume = {55}, number = {6}, year = {1995}, pages = {976-990.}, author = {Koch, W. R. and Dodd, B. G.} } @book {1653, title = {Item equivalence from paper-and-pencil to computer adaptive testing}, year = {1995}, note = {$\#$CH95-01}, address = {Unpublished doctoral dissertation, University of Chicago}, author = {Chae, S.} } @conference {1069, title = {Item exposure rates for unconstrained and content-balanced computerized adaptive tests}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1995}, address = {San Francisco CA}, author = {Morrison, C. and Subhiyah, R,, and Nungester, R.} } @conference {1262, title = {Item review and answer changing in computerized adaptive tests}, booktitle = {Paper presented at the Third European Conference on Psychological Assessment}, year = {1995}, address = {Trier, Germany}, author = {Wise, S. L.} } @article {575, title = {Item times in computerized testing: A new differential information}, journal = {European Journal of Psychological Assessment}, volume = {11 (Suppl. 1)}, year = {1995}, pages = {108-109}, author = {Hornke, L. F.} } @conference {892, title = {New algorithms for item selection and exposure control with computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1995}, address = {San Francisco CA}, author = {Davey, T. and Parshall, C. G.} } @conference {1198, title = {New item exposure control algorithms for computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the Psychometric Society}, year = {1995}, address = {Minneapolis MN}, author = {Thomasson, G. L.} } @booklet {1620, title = {A new method of controlling item exposure in computerized adaptive testing (Research Report 95-25)}, year = {1995}, address = {Princeton NJ: Educational Testing Service}, author = {Stocking, M. L. and Lewis, C.} } @booklet {1477, title = {Practical issues in large-scale high-stakes computerized adaptive testing (Research Report 95-23)}, year = {1995}, note = {$\#$MI95-23}, address = {Princeton, NJ: Educational Testing Service.}, author = {Mills, C. N. and Stocking, M. L.} } @article {776, title = {Precision and differential item functioning on a testlet-based test: The 1991 Law School Admissions Test as an example}, journal = {Applied Measurement in Education}, volume = {8}, year = {1995}, pages = {157-187}, author = {Wainer, H.,} } @conference {1243, title = {Precision of ability estimation methods in computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the Psychometric Society}, year = {1995}, note = {See APM article below.)}, address = {Minneapolis}, author = {Wang, T. and Vispoel, W. P.} } @book {1716, title = {The precision of ability estimation methods in computerized adaptive testing}, year = {1995}, note = {.}, address = {Unpublished doctoral dissertation, University of Iowa, Iowa City (UM No. 9945102)}, author = {Wang, T.} } @inbook {1757, title = {Prerequisite relationships for the adaptive assessment of knowledge}, year = {1995}, address = {Greer, J. (Ed.) Proceedings of AIED{\textquoteright}95, 7th World Conference on Artificial Intelligence in Education, Washington, DC, AACE 43-50.}, author = {Dowling, C. E. and Kaluscha, R.} } @conference {1286, title = {Recursive maximum likelihood estimation, sequential designs, and computerized adaptive testing}, booktitle = {Paper presented at the Eleventh Workshop on Item Response Theory}, year = {1995}, address = {University of Twente, the Netherlands}, author = {Ying, Z. and Chang, Hua-Hua} } @article {438, title = {Review of the book Computerized Adaptive Testing: A Primer}, journal = {Psychometrika}, volume = {4?}, year = {1995}, pages = {615-620}, author = {Andrich, D.} } @article {579, title = {Shortfall of questions curbs use of computerized graduate exam}, journal = {The Chronicle of Higher Education}, year = {1995}, chapter = {A23.}, author = {Jacobson, R. L.} } @booklet {1452, title = {Some alternative CAT item selection heuristics (Internal report)}, year = {1995}, address = {Philadelphia PA: National Board of Medical Examiners}, author = {Luecht, RM} } @conference {1154, title = {Some new methods for content balancing adaptive tests}, booktitle = {Paper presented at the annual meeting of the Psychometric Society}, year = {1995}, address = {Minneapolis MN}, author = {Segall, D. O. and Davey, T. C.} } @article {144, title = {A study of psychologically optimal level of item difficulty}, journal = {Shinrigaku Kenkyu}, volume = {65}, number = {6}, year = {1995}, note = {Fujimori, SClinical TrialControlled Clinical TrialEnglish AbstractJapanShinrigaku kenkyu : The Japanese journal of psychologyShinrigaku Kenkyu. 1995 Feb;65(6):446-53.}, month = {Feb}, pages = {446-53}, edition = {1995/02/01}, abstract = {For the purpose of selecting items in a test, this study presented a viewpoint of psychologically optimal difficulty level, as well as measurement efficiency, of items. A paper-and-pencil test (P \& P) composed of hard, moderate and easy subtests was administered to 298 students at a university. A computerized adaptive test (CAT) was also administered to 79 students. The items of both tests were selected from Shiba{\textquoteright}s Word Meaning Comprehension Test, for which the estimates of parameters of two-parameter item response model were available. The results of P \& P research showed that the psychologically optimal success level would be such that the proportion of right answers is somewhere between .75 and .85. A similar result was obtained from CAT research, where the proportion of about .8 might be desirable. Traditionally a success rate of .5 has been recommended in adaptive testing. In this study, however, it was suggested that the items of such level would be too hard psychologically for many examinees.}, keywords = {*Adaptation, Psychological, *Psychological Tests, Adult, Female, Humans, Male}, isbn = {0021-5236 (Print)0021-5236 (Linking)}, author = {Fujimori, S.} } @conference {1076, title = {Tests adaptivos y autoadaptados informatizados: Effects en la ansiedad y en la pecision de las estimaciones [SATs and CATS: Effects on enxiety and estimate precision]}, booktitle = {Paper presented at the Fourth Symposium de Metodologia de las Ciencies del Comportamiento}, year = {1995}, address = {Murcia, Spain}, author = {Olea, J. and Ponsoda, V. and Wise, S. L.} } @article {194, title = {Theoretical results and item selection from multidimensional item bank in the Mokken IRT model for polytomous items}, journal = {Applied Psychological Measurement}, volume = {19}, number = {4}, year = {1995}, pages = {337{\textendash}352}, author = {Hemker, B. T. and Sijtsma, K. and Molenaar, I. W.} } @booklet {1408, title = {Using simulation to select an adaptive testing strategy: An item bank evaluation program}, year = {1995}, address = {Unpublished manuscript, University of Pittsburgh}, author = {Hsu, T. C. and Tseng, F. L.} } @article {666, title = {ADTEST: A computer-adaptive tests based on the maximum information principle}, journal = {Educational and Psychological Measurement}, volume = {54}, year = {1994}, pages = {680-686}, author = {Ponsoda, V. and Olea, J., and Revuelta, J.} } @book {1670, title = {CAT software system [computer program}, year = {1994}, address = {Chicago IL: Computer Adaptive Technologies}, author = {Gershon, R. C.} } @booklet {1600, title = {CAT-GATB simulation studies}, year = {1994}, address = {San Diego CA: Navy Personnel Research and Development Center}, author = {Segall, D. O.} } @conference {1270, title = {Comparing computerized adaptive and self-adapted tests: The influence of examinee achievement locus of control}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1994}, address = {New Orleans LA}, author = {Wise, S. L. and Roos, L. L. and Plake, B. S.} } @article {2030, title = {A Comparison of Item Calibration Media in Computerized Adaptive Testing}, journal = {Applied Psychological Measurement}, volume = {18}, number = {197-204}, year = {1994}, author = {Hetter, R. D. and Segall, D. O. and Bloxom, B. M.} } @article {570, title = {A comparison of item calibration media in computerized adaptive tests}, journal = {Applied Psychological Measurement}, volume = {18}, year = {1994}, pages = {197-204}, author = {Hetter, R. D. and Segall, D. O. and Bloxom, B. M.} } @article {629, title = {Computer adaptive testing}, journal = {International journal of Educational Research}, volume = {6}, year = {1994}, pages = {623-634}, author = {Lunz, M. E. and Bergstrom, Betty A. and Gershon, R. C.} } @article {493, title = {Computer adaptive testing: A shift in the evaluation paradigm}, journal = {Educational Technology Systems}, volume = {22 (3)}, year = {1994}, pages = {213-224}, author = {Carlson, R.} } @article {527, title = {Computer adaptive testing: Assessment of the future}, journal = {Curriculum/Technology Quarterly}, volume = {4 (2)}, year = {1994}, pages = {1-3}, author = {Diones, R. and Everson, H.} } @conference {846, title = {Computerized adaptive testing exploring examinee response time using hierarchical linear modeling}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1994}, note = {ERIC No. ED 400 286).}, address = {New Orleans LA}, author = {Bergstrom, B. and Gershon, R. C.} } @article {480, title = {Computerized adaptive testing for licensure and certification}, journal = {CLEAR Exam Review}, volume = {Winter 1994}, year = {1994}, pages = {25-27}, author = {Bergstrom, Betty A. and Gershon, R. C.} } @article {705, title = {Computerized adaptive testing: Revolutionizing academic assessment}, journal = {Community College Journal}, volume = {65 (1)}, year = {1994}, pages = {32-35}, author = {Smittle, P.} } @booklet {1365, title = {Computerized mastery testing using fuzzy set decision theory (Research Report 94-37)}, year = {1994}, address = {Princeton NJ: Educational Testing Service}, author = {Du, Y. and Lewis, C. and Pashley, P. J.} } @booklet {1489, title = {Computerized Testing (Research Report 94-22).}, year = {1994}, address = {Princeton NJ: Educational Testing Service}, author = {Oltman, P. K.} } @article {763, title = {Computerized-adaptive and self-adapted music-listening tests: Features and motivational benefits}, journal = {Applied Measurement in Education}, volume = {7}, year = {1994}, pages = {25-51}, author = {Vispoel, W. P., and Coffman, D. D.} } @booklet {1591, title = {DIF analysis for pretest items in computer-adaptive testing (Educational Testing Service Research Rep No RR 94-33)}, year = {1994}, note = {$\#$ZW94-33}, address = {Princeton NJ: Educational Testing Service.}, author = {Zwick, R. and Thayer, D. T. and Wingersky, M.} } @conference {1049, title = {Early psychometric research in the CAT-ASVAB Project}, booktitle = {Paper presented at the 102nd Annual Convention of the American Psychological Association. Los Angeles}, year = {1994}, address = {CA}, author = {J. R. McBride} } @conference {955, title = {The effect of restricting ability distributions in the estimation of item difficulties: Implications for a CAT implementation}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1994}, address = {New Orleans}, author = {Ito, K. and Sykes, R.C.} } @article {630, title = {The effect of review on the psychometric characteristics of computerized adaptive tests}, journal = {Applied Measurement in Education}, volume = {7(3)}, year = {1994}, pages = {211-222}, author = {Lunz, M. E. and Stone, G. E.} } @article {717, title = {The effect of review on the psychometric characteristics of computerized adaptive tests}, journal = {Applied Measurement in Education}, volume = {7}, year = {1994}, pages = {211-222}, author = {Stone, G. E. and Lunz, M. E.} } @article {376, title = {The effect of review on the psychometric characterstics of computerized adaptive tests}, journal = {Applied Measurement in Education}, volume = {7}, number = {3}, year = {1994}, note = {Lawrence Erlbaum, US}, pages = {211-222}, abstract = {Explored the effect of reviewing items and altering responses on examinee ability estimates, test precision, test information, decision confidence, and pass/fail status for computerized adaptive tests. Two different populations of examinees took different computerized certification examinations. For purposes of analysis, each population was divided into 3 ability groups (high, medium, and low). Ability measures before and after review were highly correlated, but slightly lower decision confidence was found after review. Pass/fail status was most affected for examinees with estimates close to the pass point. Decisions remained the same for 94\% of the examinees. Test precision is only slightly affected by review, and the average information loss can be recovered by the addition of one item. (PsycINFO Database Record (c) 2002 APA, all rights reserved).}, author = {Stone, G. E. and Lunz, M. E.} } @book {1635, title = {Effects of computerized adaptive test anxiety on nursing licensure examinations}, year = {1994}, address = {Dissertation Abstracts International, A (Humanities and Social Sciences), 54 (9-A), 3410}, author = {Arrowwood, V. E.} } @conference {944, title = {The effects of item pool depth on the accuracy of pass/fail decisions for NCLEX using CAT}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1994}, address = {New Orleans}, author = {Haynie, K.A. and Way, W. D.} } @article {262, title = {An empirical study of computerized adaptive test administration conditions}, journal = {Journal of Educational Measurement}, volume = {31}, number = {3}, year = {1994}, month = {Fal}, pages = {251-263}, author = {Lunz, M. E. and Bergstrom, Betty A.} } @inbook {25, title = {The equivalence of Rasch item calibrations and ability estimates across modes of administration}, booktitle = {Objective measurement: Theory into practice}, volume = {2}, year = {1994}, pages = {122-128}, publisher = {Ablex Publishing Co.}, organization = {Ablex Publishing Co.}, address = {Norwood, N.J. USA}, keywords = {computerized adaptive testing}, author = {Bergstrom, Betty A. and Lunz, M. E.} } @conference {914, title = {Establishing the comparability of the NCLEX using CAT with traditional NCLEX examinations}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1994}, address = {New Orleans, LA}, author = {Eignor, D. R. and Way, W. D. and Amoss, K.E.} } @conference {887, title = {Evaluation and implementation of CAT-ASVAB}, booktitle = {Paper presented at the annual meeting of the American Psychological Association}, year = {1994}, address = {Los Angeles}, author = {Curran, L. T. and Wise, L. L.} } @book {1692, title = {The exploration of an alternative method for scoring computer adaptive tests}, year = {1994}, address = {Unpublished doctoral dissertation, Lincoln NE: University of Nebraska}, author = {Potenza, M.} } @conference {1026, title = {A few more issues to consider in multidimensional computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1994}, address = {San Francisco}, author = {Luecht, RM} } @article {473, title = {A general approach to algorithmic design of fixed-form tests, adaptive tests, and testlets}, journal = {Applied Psychological Measurement}, volume = {18}, year = {1994}, pages = {141-153}, author = {Berger, M. P. F.} } @article {2029, title = {A General Approach to Algorithmic Design of Fixed-Form Tests, Adaptive Tests, and Testlets}, journal = {Applied Psychological Measurement}, volume = {18}, year = {1994}, pages = {141-153}, author = {Berger, M. P. F.} } @conference {375, title = {The historical developments of fit and its assessment in the computerized adaptive testing environment}, booktitle = {Midwestern Education Research Association annual meeting}, year = {1994}, month = {10/1994}, address = {Chicago, IL USA}, author = {Stone, G. E.} } @article {750, title = {The incomplete equivalence of the paper-and-pencil and computerized versions of the General Aptitude Test Battery}, journal = {Journal of Applied Psychology}, volume = {79}, year = {1994}, pages = {852-859}, author = {Van de Vijver, F. J. R., and Harsveld, M.} } @article {765, title = {Individual differences and test administration procedures: A comparison of fixed-item, computerized adaptive, and self-adapted testing}, journal = {Applied Measurement in Education}, volume = {7}, year = {1994}, pages = {53-79}, author = {Vispoel, W. P. and Rocklin, T. R. and Wang, T.} } @conference {1185, title = {Item calibration considerations: A comparison of item calibrations on written and computerized adaptive examinations}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1994}, address = {New Orleans LA}, author = {Stone, G. E. and Lunz, M. E.} } @booklet {1499, title = {La simulation de mod{\`e}le sur ordinateur en tant que m{\'e}thode de recherche : le cas concret de l{\textquoteright}{\'e}tude de la distribution d{\textquoteright}{\'e}chantillonnage de l{\textquoteright}estimateur du niveau d{\textquoteright}habilet{\'e} en testing adaptatif en fonction de deux r{\`e}gles d{\textquoteright}arr{\^e}t}, year = {1994}, address = {Actes du 6e colloque de l{\textquoteleft}Association pour la recherche au coll{\'e}gial. Montr{\'e}al : Association pour la recherche au coll{\'e}gial, ARC}, author = {Ra{\^\i}che, G.} } @conference {1099, title = {L{\textquoteright}{\'e}valuation nationale individualis{\'e}e et assist{\'e}e par ordinateur [Large scale assessment: Tailored and computerized]}, booktitle = {Qu{\'e}bec: Proceeding of the 14th Congress of the Association qu{\'e}b{\'e}coise de p{\'e}dagogie coll{\'e}giale. Montr{\'e}al: Association qu{\'e}b{\'e}coise de p{\'e}dagogie coll{\'e}giale (AQPC).}, year = {1994}, author = {Ra{\^\i}che, G. and B{\'e}land, A.} } @article {221, title = {Monte Carlo simulation comparison of two-stage testing and computerized adaptive testing}, journal = {Dissertation Abstracts International Section A: Humanities \& Social Sciences}, volume = {54}, number = {7-A}, year = {1994}, pages = {2548}, keywords = {computerized adaptive testing}, author = {Kim, H-O.} } @conference {915, title = {Pinpointing PRAXIS I CAT characteristics through simulation procedures}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1994}, address = {New Orleans, LA}, author = {Eignor, D. R. and Folk, V.G., and Li, M.-Y. and Stocking, M. L.} } @article {669, title = {The psychological impacts of computerized adaptive testing methods}, journal = {Educational Technology}, volume = {34}, year = {1994}, pages = {41-47}, author = {Powell, Z. E.} } @article {812, title = {The relationship between examinee anxiety and preference for self-adapted testing}, journal = {Applied Measurement in Education }, volume = {7}, year = {1994}, pages = {81-91}, author = {Wise, S. L. and Roos, L. L. and Plake, B. S., and Nebelsick-Gullett, L. J.} } @inbook {266, title = {Reliability of alternate computer adaptive tests}, booktitle = {Objective measurement, theory into practice}, volume = {II}, year = {1994}, publisher = {Ablex}, organization = {Ablex}, address = {New Jersey}, author = {Lunz, M. E. and Bergstrom, Betty A. and Wright, B. D.} } @conference {1113, title = {The selection of test items for decision making with a computer adaptive test}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1994}, address = {New Orleans LA}, author = {Reckase, M. D. and Spray, J. A.} } @conference {1178, title = {The selection of test items for decision making with a computer adaptive test}, booktitle = {Paper presented at the national meeting of the National Council on Measurement in Education}, year = {1994}, note = {$\#$SP94-01}, address = {New Orleans LA}, author = {Spray, J. A. and Reckase, M. D.} } @article {682, title = {Self-adapted testing}, journal = {Applied Measurement in Education}, volume = {7}, year = {1994}, pages = {3-14}, author = {Rocklin, T. R.} } @booklet {1544, title = {A simple and fast item selection procedure for adaptive testing}, year = {1994}, address = {Research (Report 94-13). University of Twente.}, author = {Veerkamp, W. J. J.} } @article {2028, title = {A Simulation Study of Methods for Assessing Differential Item Functioning in Computerized Adaptive Tests}, journal = {Applied Psychological Measurement}, volume = {18}, number = {2}, year = {1994}, pages = {121-140}, author = {Zwick, R. and Thayer, D. T. and Wingersky, M.} } @booklet {1563, title = {A simulation study of the Mantel-Haenszel procedure for detecting DIF with the NCLEX using CAT (Technical Report xx-xx)}, year = {1994}, address = {Princeton NJ: Educational Testing Service}, author = {Way, W. D.} } @booklet {1545, title = {Some new item selection criteria for adaptive testing (Research Rep 94-6)}, year = {1994}, address = {Enschede, The Netherlands: University of Twente, Department of Educational Measurement and Data Analysis.}, author = {Veerkamp, W. J. and Berger, M. P. F.} } @booklet {1614, title = {Three practical issues for modern adaptive testing item pools (Research Report 94-5),}, number = {(Research Report 94-5)}, year = {1994}, address = {Princeton NJ: Educational Testing Service}, author = {Stocking, M. L.} } @article {807, title = {Understanding self-adapted testing: The perceived control hypothesis}, journal = {Applied Measurement in Education}, volume = {7}, year = {1994}, pages = {15-24}, author = {Wise, S. L.} } @inbook {1870, title = {Utilisation de la simulation en tant que m{\'e}thodologie de recherche [Simulation methodology in research]}, year = {1994}, address = {Association pour la recherche au coll{\'e}gial (Ed.) : L{\textquoteright}en-qu{\^e}te de la cr{\'e}ativit{\'e} [In quest of creativity]. Proceeding of the 6th Congress of the ARC. Montr{\'e}al: Association pour la recherche au coll{\'e}gial (ARC).}, author = {Ra{\^\i}che, G.} } @article {715, title = {The application of an automated item selection method to real data}, journal = {Applied Psychological Measurement}, volume = {17}, year = {1993}, pages = {167-176}, author = {Stocking, M. L. and Swanson, L. and Pearlman, M.} } @article {284, title = {An application of Computerized Adaptive Testing to the Test of English as a Foreign Language}, journal = {Dissertation Abstracts International}, volume = {53}, number = {12-A}, year = {1993}, pages = {4257-4258}, keywords = {computerized adaptive testing}, author = {Moon, O.} } @article {224, title = {Assessing the utility of item response models: computerized adaptive testing}, journal = {Educational Measurement: Issues and Practice}, volume = {12}, number = {1}, year = {1993}, pages = {21-27}, keywords = {computerized adaptive testing}, author = {Kingsbury, G. G. and Houser, R.L.} } @booklet {1372, title = {Case studies in computer adaptive test design through simulation (Research Report RR-93-56)}, year = {1993}, note = {$\#$EI93-56 (also presented at the 1993 National Council on Measurement in Education meeting in Atlanta GA)}, address = {Princeton NJ: Educational Testing Service}, author = {Eignor, D. R. and Stocking, M. L. and Way, W. D. and Steffen, M.} } @booklet {1373, title = {Case studies in computerized adaptive test design through simulation (Research Report 93-56)}, year = {1993}, address = {Princeton NJ: Educational Testing Service}, author = {Eignor, D. R. and Way, W. D. and Stocking, M. and Steffen, M.} } @article {339, title = {Comparability and validity of computerized adaptive testing with the MMPI-2}, journal = {Dissertation Abstracts International}, volume = {53}, number = {7-B}, year = {1993}, pages = {3791}, keywords = {computerized adaptive testing}, author = {Roper, B. L.} } @book {1665, title = {A comparison of computer adaptive test administration methods}, year = {1993}, address = {Unpublished doctoral dissertation, University of Chicago}, author = {Dolan, S.} } @conference {1177, title = {Comparison of SPRT and sequential Bayes procedures for classifying examinees into two categories using an adaptive test}, booktitle = {Unpublished manuscript. (}, year = {1993}, author = {Spray, J. A. and Reckase, M. D.} } @article {47, title = {Computer adaptive testing: A comparison of four item selection strategies when used with the golden section search strategy for estimating ability}, journal = {Dissertation Abstracts International}, volume = {54}, number = {5-A}, year = {1993}, pages = {1772}, keywords = {computerized adaptive testing}, author = {Carlson, R. D.} } @article {704, title = {Computer adaptive testing: A new era}, journal = {Journal of Developmental Education}, volume = {17 (1)}, year = {1993}, pages = {8-10}, author = {Smittle, P.} } @article {757, title = {Computerized adaptive and fixed-item versions of the ITED Vocabulary test}, journal = {Educational and Psychological Measurement}, volume = {53}, year = {1993}, pages = {779-788}, author = {Vispoel, W. P.} } @conference {1189, title = {Computerized adaptive testing in computer science: assessing student programming abilities}, booktitle = {Proceedings of the twenty-fourth SIGCSE Technical Symposium on Computer Science Education}, year = {1993}, address = {Indianapolis IN}, author = {Syang, A. and Dale, N.B.} } @article {804, title = {Computerized adaptive testing in instructional settings}, journal = {Educational Technology Research and Development}, volume = {41(3)}, year = {1993}, pages = {47-62}, author = {Welch, R. E., and Frick, T.} } @book {1724, title = {Computerized adaptive testing strategies: Golden section search, dichotomous search, and Z-score strategies (Doctoral dissertation, Iowa State University, 1990)}, year = {1993}, address = {Dissertation Abstracts International, 54-03B, 1720}, author = {Xiao, B.} } @article {178, title = {Computerized adaptive testing: the future is upon us}, journal = {Nurs Health Care}, volume = {14}, number = {7}, year = {1993}, note = {Halkitis, P NLeahy, J MUnited statesNursing \& health care : official publication of the National League for NursingNurs Health Care. 1993 Sep;14(7):378-85.}, month = {Sep}, pages = {378-85}, edition = {1993/09/01}, keywords = {*Computer-Assisted Instruction, *Education, Nursing, *Educational Measurement, *Reaction Time, Humans, Pharmacology/education, Psychometrics}, isbn = {0276-5284 (Print)}, author = {Halkitis, P. N. and Leahy, J. M.} } @article {106, title = {Computerized adaptive testing using the partial credit model: Effects of item pool characteristics and different stopping rules}, journal = {Educational and Psychological Measurement}, volume = {53}, number = {1}, year = {1993}, pages = {61-77.}, abstract = {Simulated datasets were used to research the effects of the systematic variation of three major variables on the performance of computerized adaptive testing (CAT) procedures for the partial credit model. The three variables studied were the stopping rule for terminating the CATs, item pool size, and the distribution of the difficulty of the items in the pool. Results indicated that the standard error stopping rule performed better across the variety of CAT conditions than the minimum information stopping rule. In addition it was found that item pools that consisted of as few as 30 items were adequate for CAT provided that the item pool was of medium difficulty. The implications of these findings for implementing CAT systems based on the partial credit model are discussed. }, author = {Dodd, B. G. and Koch, W. R. and De Ayala, R. J.,} } @article {533, title = {Computerized mastery testing using fuzzy set decision theory}, journal = {Applied Measurement in Education}, volume = {6}, year = {1993}, note = {(Also Educational Testing Service Research Report 94-37)}, pages = {181-193}, author = {Du, Y. and Lewis, C. and Pashley, P. J.} } @booklet {1613, title = {Controlling item exposure rates in a realistic adaptive testing paradigm (Research Report 93-2)}, year = {1993}, address = {Princeton NJ: Educational Testing Service}, author = {Stocking, M. L.} } @booklet {1371, title = {Deriving comparable scores for computer adaptive and conventional tests: An example using the SAT}, number = {(ETS Research Report RR-93-5)}, year = {1993}, note = {$\#$EI93-55 (Also presented at the 1993 National Council on Measurement in Education meeting in Atlanta GA.)}, address = {Princeton NJ: Educational Testing Service}, author = {Eignor, D. R.} } @article {758, title = {The development and evaluation of a computerized adaptive test of tonal memory}, journal = {Journal of Research in Music Education}, volume = {41}, year = {1993}, pages = {111-136}, author = {Vispoel, W. P.} } @conference {1229, title = {The efficiency, reliability, and concurrent validity of adaptive and fixed-item tests of music listening skills}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1993}, address = {Atlanta GA}, author = {Vispoel, W. P. and Wang, T. and Bleiler, T.} } @conference {1115, title = {Establishing time limits for the GRE computer adaptive tests}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1993}, address = {Atlanta GA}, author = {Reese, C. M.} } @booklet {1593, title = {Field test of a computer-based GRE general test (GRE Board Technical Report 88-8; Educational Testing Service Research Rep No RR 93-07)}, year = {1993}, address = {Princeton NJ: Educational Testing Service.}, author = {Schaeffer, G. A. and Reese, C. M. and Steffen, M. and McKinley, R. L. and Mills, C. N.} } @conference {1228, title = {Individual differences and test administration procedures: A comparison of fixed-item, adaptive, and self-adapted testing}, booktitle = {Paper presented at the annual meeting of the AEARA}, year = {1993}, address = {Atlanta GA}, author = {Vispoel, W. P. and Rocklin, T. R.} } @conference {965, title = {Individual differences in computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the Mid-South Educational Research Association}, year = {1993}, address = {New Orleans LA}, author = {Kim, J.} } @booklet {1594, title = {Introduction of a computer adaptive GRE General test (Research Report 93-57)}, year = {1993}, address = {Princeton NJ: Educational Testing Service}, author = {Schaeffer, G. A. and Steffen, M. and Golub-Smith, M. L.} } @conference {1269, title = {An investigation of restricted self-adapted testing}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1993}, address = {Atlanta GA}, author = {Wise, S. L. and Kingsbury, G. G. and Houser, R.L.} } @booklet {1403, title = {Item Calibration: Medium-of-administration effect on computerized adaptive scores (TR-93-9)}, year = {1993}, address = {Navy Personnel Research and Development Center}, author = {Hetter, R. D. and Bloxom, B. M. and Segall, D. O.} } @booklet {1433, title = {Les tests adaptatifs en langue seconde}, year = {1993}, address = {Communication lors de la 16e session d{\textquoteright}{\'e}tude de l{\textquoteright}ADM{\'E}{\'E} {\`a} Laval. Montr{\'e}al: Association pour le d{\'e}veloppement de la mesure et de l{\textquoteright}{\'e}valuation en {\'e}ducation.}, author = {Laurier, M.} } @article {719, title = {Linking the standard and advanced forms of the Ravens Progressive Matrices in both the paper-and-pencil and computer-adaptive-testing formats}, journal = {Educational and Psychological Measurement}, volume = {53}, year = {1993}, pages = {905-925}, author = {Styles, I. and Andrich, D.} } @article {2027, title = {A Method for Severely Constrained Item Selection in Adaptive Testing}, journal = {Applied Psychological Measurement}, volume = {17}, year = {1993}, pages = {277-292}, author = {Stocking, M. L. and Swanson, L.} } @article {712, title = {A method for severely constrained item selection in adaptive testing}, journal = {Applied Psychological Measurement}, volume = {17}, year = {1993}, pages = {277-292}, author = {Stocking, M., and Swanson, L.} } @article {713, title = {A model and heuristic for solving very large item selection problems}, journal = {Applied Psychological Measurement}, volume = {17}, year = {1993}, pages = {151-166}, author = {Stocking, M., and Swanson, L.} } @conference {1184, title = {Modern computerized adaptive testing}, booktitle = {Paper presented at the Joint Statistics and Psychometric Seminar}, year = {1993}, address = {Princeton NJ}, author = {Stocking, M. L.} } @conference {967, title = {Monte Carlo simulation comparison of two-stage testing and computerized adaptive testing}, booktitle = {Paper presented at the meeting of the National Council on Measurement in Education}, year = {1993}, address = {Atlanta, GA}, author = {Kim, H. and Plake, B. S.} } @article {217, title = {Moving in a new direction: Computerized adaptive testing (CAT)}, journal = {Nursing Management}, volume = {24}, number = {1}, year = {1993}, note = {Jones-Dickson, CDorsey, DCampbell-Warnock, JFields, FUnited statesNursing managementNurs Manage. 1993 Jan;24(1):80, 82.}, month = {Jan}, pages = {80, 82}, edition = {1993/01/01}, keywords = {*Computers, Accreditation/methods, Educational Measurement/*methods, Licensure, Nursing, United States}, isbn = {0744-6314 (Print)}, author = {Jones-Dickson, C. and Dorsey, D. and Campbell-Warnock, J. and Fields, F.} } @booklet {1605, title = {Multiple-category classification using a sequential probability ratio test (Research report 93-7)}, year = {1993}, note = {$\#$SP93-7}, address = {Iowa City: American College Testing.}, author = {Spray, J. A.} } @article {578, title = {New computer technique seen producing a revolution in testing}, journal = {The Chronicle of Higher Education}, volume = {40}, year = {1993}, pages = {22-23, 26}, edition = {4}, chapter = {.}, author = {Jacobson, R. L.} } @conference {976, title = {A practical examination of the use of free-response questions in computerized adaptive testing}, booktitle = {Paper presented to the annual meeting of the American Educational Research Association: Atlanta GA.}, year = {1993}, note = {{PDF file, 30 KB}}, author = {Kingsbury, G. G. and Houser, R.L.} } @conference {1118, title = {A simulated comparison of testlets and a content balancing procedure for an adaptive certification examination}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1993}, address = {Atlanta}, author = {Reshetar, R. A. and Norcini, J. J. and Shea, J. A.} } @conference {1119, title = {A simulated comparison of two content balancing and maximum information item selection procedures for an adaptive certification examination}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1993}, address = {Atlanta}, author = {Reshetar, R. A. and Norcini, J. J. and Shea, J. A.} } @booklet {1590, title = {A simulation study of methods for assessing differential item functioning in computer-adaptive tests (Educational Testing Service Research Rep No RR 93-11)}, year = {1993}, address = {Princeton NJ: Educational Testing Service.}, author = {Zwick, R. and Thayer, D. and Wingersky, M.} } @conference {1162, title = {Some initial experiments with adaptive survey designs for structured questionnaires}, booktitle = {Paper presented at the New Methods and Applications in Consumer Research Conference}, year = {1993}, address = {Cambridge MA}, author = {Singh, J.} } @article {775, title = {Some practical considerations when converting a linearly administered test to an adaptive format}, journal = {Educational Measurement: Issues and Practice}, volume = {12 (1)}, year = {1993}, pages = {15-20}, author = {Wainer, H.,} } @conference {1038, title = {Test targeting and precision before and after review on computer-adaptive tests}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1993}, address = {Atlanta GA}, author = {Lunz, M. E. and Stahl, J. A. and Bergstrom, Betty A.} } @inbook {1819, title = {Un test adaptatif en langue seconde : la perception des apprenants}, year = {1993}, address = {R.Hivon ({\'E}d.),L{\textquoteright}{\'e}valuation des apprentissages. Sherbrooke : {\'E}ditions du CRP.}, author = {Laurier, M.} } @conference {844, title = {Ability measure equivalence of computer adaptive and paper and pencil tests: A research synthesis}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1992}, address = {San Francisco}, author = {Bergstrom, B.} } @article {27, title = {Altering the level of difficulty in computer adaptive testing}, journal = {Applied Measurement in Education}, volume = {5}, number = {2}, year = {1992}, note = {Lawrence Erlbaum, US}, pages = {137-149}, abstract = {Examines the effect of altering test difficulty on examinee ability measures and test length in a computer adaptive test. The 225 Ss were randomly assigned to 3 test difficulty conditions and given a variable length computer adaptive test. Examinees in the hard, medium, and easy test condition took a test targeted at the 50\%, 60\%, or 70\% probability of correct response. The results show that altering the probability of a correct response does not affect estimation of examinee ability and that taking an easier computer adaptive test only slightly increases the number of items necessary to reach specified levels of precision. (PsycINFO Database Record (c) 2002 APA, all rights reserved).}, keywords = {computerized adaptive testing}, author = {Bergstrom, Betty A. and Lunz, M. E. and Gershon, R. C.} } @article {631, title = {The application of latent class models in adaptive testing}, journal = {Psychometrika}, volume = {57}, year = {1992}, pages = {71-88}, author = {Macready, G. B. and Dayton, C. M.} } @conference {849, title = {Assessing existing item bank depth for computer adaptive testing}, booktitle = {ERIC Document No. TM022404}, year = {1992}, author = {Bergstrom, Betty A. and Stahl, J. A.} } @article {648, title = {CAT-ASVAB precision}, journal = {Proceedings of the 34th Annual Conference of the Military Testing Association}, volume = {1}, year = {1992}, pages = {22-26}, author = {Moreno, K. E., and Segall, D. O.} } @conference {1288, title = {A comparison of computerized adaptive and paper-and-pencil versions of the national registered nurse licensure examination}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1992}, address = {San Francisco CA}, author = {A Zara} } @conference {845, title = {Comparison of item targeting strategies for pass/fail adaptive tests}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1992}, note = {(ERIC NO. ED 400 287).}, address = {San Francisco CA}, author = {Bergstrom, B. and Gershon, R. C.} } @book {1701, title = {A comparison of methods for adaptive estimation of a multidimensional trait}, year = {1992}, address = {Unpublished doctoral dissertation, Columbia University}, author = {Tam, S. S.} } @article {810, title = {A comparison of self-adapted and computerized adaptive achievement tests}, journal = {Journal of Educational Measurement}, volume = {29}, year = {1992}, pages = {329-339}, author = {Wise, S. L. and Plake, S. S and Johnson, P. L. and Roos, S. L.} } @article {529, title = {A comparison of the partial credit and graded response models in computerized adaptive testing}, journal = {Applied Measurement in Education}, volume = {5}, year = {1992}, pages = {17-34}, author = {De Ayala, R. J. and Dodd, B. G. and Koch, W. R.} } @article {778, title = {A comparison of the performance of simulated hierarchical and linear testlets}, journal = {Journal of Educational Measurement}, volume = {29}, year = {1992}, pages = {243-251}, author = {Wainer, H., and Kaplan, B. and Lewis, C.} } @book {1647, title = {Computer adaptive versus paper-and-pencil tests}, year = {1992}, address = {Unpublished doctoral dissertation, University of Chicago}, author = {Bergstrom, B.} } @article {487, title = {Computer-based adaptive testing in music research and instruction}, journal = {Psychomusicology}, volume = {10}, year = {1992}, pages = {49-63}, author = {Bowers, D. R.} } @booklet {1374, title = {Computerized adaptive assessment of cognitive abilities among disabled adults}, year = {1992}, address = {ERIC Document No ED301274}, author = {Engdahl, B.} } @article {550, title = {Computerized adaptive mastery tests as expert systems}, journal = {Journal of Educational Computing Research}, volume = {8(2)}, year = {1992}, pages = {187-213}, author = {Frick, T. W.} } @article {141, title = {Computerized adaptive mastery tests as expert systems}, journal = {Journal of Educational Computing Research}, volume = {8}, number = {2}, year = {1992}, pages = {187-213.}, author = {Frick, T. W.} } @article {126, title = {Computerized adaptive testing for NCLEX-PN}, journal = {Journal of Practical Nursing}, volume = {42}, number = {2}, year = {1992}, note = {Fields, F AUnited statesThe Journal of practical nursingJ Pract Nurs. 1992 Jun;42(2):8-10.}, month = {Jun}, pages = {8-10}, edition = {1992/06/01}, keywords = {*Licensure, *Programmed Instruction, Educational Measurement/*methods, Humans, Nursing, Practical/*education}, isbn = {0022-3867 (Print)}, author = {Fields, F. A.} } @article {118, title = {Computerized adaptive testing: Its potential substantive contribution to psychological research and assessment}, journal = {Current Directions in Psychological Science}, volume = {1}, number = {4}, year = {1992}, pages = {129-133}, author = {Embretson, S. E.} } @article {762, title = {Computerized adaptive testing of music-related skills}, journal = {Bulletin of the Council for Research in Music Education}, volume = {112}, year = {1992}, pages = {29-49}, author = {Vispoel, W. P., and Coffman, D. D.} } @article {610, title = {Computerized adaptive testing with different groups}, journal = {Educational Measurement: Issues and Practice}, volume = {11 (2)}, year = {1992}, pages = {23-27}, author = {Legg, S. M., and Buhr, D. C.} } @conference {842, title = {Computerized adaptive testing with the MMPI-2: Reliability, validity, and comparability to paper and pencil administration}, booktitle = {Paper presented at the 27th Annual Symposium on Recent Developments in the MMPI/MMPI-2}, year = {1992}, address = {Minneapolis MN}, author = {Ben-Porath, Y. S. and Roper, B. L.} } @article {697, title = {Computerized mastery testing with nonequivalent testlets}, journal = {Applied Psychological Measurement}, volume = {16}, year = {1992}, pages = {65-76}, author = {Sheehan, K., and Lewis, C.} } @article {2025, title = {Computerized Mastery Testing With Nonequivalent Testlets}, journal = {Applied Psychological Measurement}, volume = {16}, year = {1992}, pages = {65-76}, author = {Sheehan, K. and Lewis, C.} } @article {24, title = {Confidence in pass/fail decisions for computer adaptive and paper and pencil examinations}, journal = {Evaluation and the Health Professions}, volume = {15}, number = {4}, year = {1992}, note = {Sage Publications, US}, pages = {453-464}, abstract = {Compared the level of confidence in pass/fail decisions obtained with computer adaptive tests (CADTs) and pencil-and-paper tests (PPTs). 600 medical technology students took a variable-length CADT and 2 fixed-length PPTs. The CADT was stopped when the examinee ability estimate was either 1.3 times the standard error of measurement above or below the pass/fail point or when a maximum test length was reached. Results show that greater confidence in the accuracy of the pass/fail decisions was obtained for more examinees when the CADT implemented a 90\% confidence stopping rule than with PPTs of comparable test length. (PsycINFO Database Record (c) 2002 APA, all rights reserved).}, author = {Bergstrom, Betty A. and Lunz, M. E.} } @article {478, title = {Confidence in pass/fail decisions for computer adaptive and paper and pencil examinations}, journal = {Evaluation and The Health Professions}, volume = {15(4)}, year = {1992}, pages = {435-464}, author = {Bergstrom, Betty A.} } @article {475, title = {Confidence in pass/fail decisions for computer adaptive and paper and pencil examinations}, journal = {Evaluation and the Health Professions}, volume = {15}, year = {1992}, pages = {435-464}, author = {Bergstrom, Betty A.} } @article {100, title = {The development and evaluation of a system for computerized adaptive testing}, journal = {Dissertation Abstracts International}, volume = {52}, number = {12-A}, year = {1992}, pages = {4304}, keywords = {computerized adaptive testing}, author = {de la Torre Sanchez, R.} } @inbook {1849, title = {The development of alternative operational concepts}, year = {1992}, address = {Proceedings of the 34th Annual Conference of the Military Testing Association. San Diego, CA: Navy Personnel Research and Development Center.}, author = {J. R. McBride and Curran, L. T.} } @conference {1295, title = {Differential item functioning analysis for computer-adaptive tests and other IRT-scored measures}, booktitle = {Paper presented at the annual meeting of the Military Testing Association}, year = {1992}, address = {San Diego CA}, author = {Zwick, R.} } @article {265, title = {The effect of review on student ability and test efficiency for computerized adaptive tests}, journal = {Applied Psychological Measurement}, volume = {16}, number = {1}, year = {1992}, note = {Sage Publications, US}, pages = {33-40}, abstract = {220 students were randomly assigned to a review condition for a medical technology test; their test instructions indicated that each item must be answered when presented, but that the responses could be reviewed and altered at the end of the test. A sample of 492 students did not have the opportunity to review and alter responses. Within the review condition, examinee ability estimates before and after review were correlated .98. The average efficiency of the test was decreased by 1\% after review. Approximately 32\% of the examinees improved their ability estimates after review but did not change their pass/fail status. Disallowing review on adaptive tests administered under these rules is not supported by these data. (PsycINFO Database Record (c) 2002 APA, all rights reserved).}, author = {Lunz, M. E. and Bergstrom, Betty A. and Wright, Benjamin D.} } @article {2024, title = {The Effect of Review on Student Ability and Test Efficiency for Computerized Adaptive Tests}, journal = {Applied Psychological Measurement}, volume = {16}, year = {1992}, pages = {33-40}, author = {Lunz, M. E. and Berstrom, B.A. and Wright, B. D.} } @conference {950, title = {Effects of feedback during self-adapted testing on estimates of ability}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1992}, address = {San Francisco}, author = {Holst, P. M. and O{\textquoteright}Donnell, A. M. and Rocklin, T. R.} } @conference {1129, title = {The effects of feedback in computerized adaptive and self-adapted tests}, booktitle = {Paper presented at the annual meeting of the NMCE}, year = {1992}, address = {San Francisco}, author = {Roos, L. L. and Plake, B. S. and Wise, S. L.} } @conference {981, title = {Estimation of ability level by using only observable quantities in adaptive testing}, booktitle = {Paper presented at the annual meeting if the American Educational Research Association}, year = {1992}, address = {Chicago}, author = {Kirisci, L.} } @inbook {1850, title = {Evaluation of alternative operational concepts}, year = {1992}, address = {Proceedings of the 34th Annual Conference of the Military Testing Association. San Diego, CA: Navy Personnel Research and Development Center.}, author = {J. R. McBride and Hogan, P.F.} } @booklet {1559, title = {A general Bayesian model for testlets: theory and applications (Research Report 92-21; GRE Board Professional Report No 99-01P)}, year = {1992}, address = {Princeton NJ: Educational Testing Service.}, author = {Wang, X and Bradlow, E. T. and Wainer, H.,} } @conference {1230, title = {How review options and administration mode influence scores on computerized vocabulary tests}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1992}, note = {$\#$VI92-01}, address = {San Francisco CA}, author = {Vispoel, W. P. and Wang, T. and De la Torre, R. and Bleiler, T. and Dings, J.} } @article {756, title = {Improving the measurement of tonal memory with computerized adaptive tests}, journal = {Psychomusicology}, volume = {11}, year = {1992}, pages = {73-89}, author = {Vispoel, W. P.} } @conference {1236, title = {Incorporating post-administration item response revision into a CAT}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1992}, address = {San Francisco}, author = {Wang, M. and Wingersky, M.} } @article {526, title = {The influence of dimensionality on CAT ability estimation}, journal = {Educational and Psychological Measurement}, volume = {52}, year = {1992}, pages = {513-528}, author = {De Ayala, R. J.,} } @article {626, title = {Item selection using an average growth approximation of target information functions}, journal = {Applied Psychological Measurement}, volume = {16}, year = {1992}, pages = {41-51}, author = {Luecht, RM and Hirsch, T. M.} } @booklet {1411, title = {The Language Training Division{\textquoteright}s computer adaptive reading proficiency test}, year = {1992}, address = {Provo, UT: Language Training Division, Office of Training and Education}, author = {Janczewski, D. and Lowe, P.} } @article {440, title = {Le testing adaptatif avec interpr{\'e}tation crit{\'e}rielle, une exp{\'e}rience de praticabilit{\'e} du TAM pour l{\textquoteright}{\'e}valuation sommative des apprentissages au Qu{\'e}bec.}, journal = {Mesure et {\'e}valuation en {\'e}ducation}, volume = {15-1 et 2}, year = {1992}, chapter = {10}, author = {Auger, R.}, editor = {Seguin, S. P.} } @book {1709, title = {Manual for the General Scholastic Aptitude Test (Senior) Computerized adaptive test}, year = {1992}, address = {Pretoria: Human Sciences Research Council}, author = {Von Tonder, M. and Claasswn, N. C. W.} } @booklet {1625, title = {A method for severely constrained item selection in adaptive testing}, year = {1992}, address = {Educational Testing Service Research Report (RR-92-37): Princeton NJ}, author = {Stocking, M. L. and Swanson, L.} } @conference {1025, title = {Multidimensional CAT simulation study}, year = {1992}, author = {Luecht, RM} } @article {2026, title = {The Nominal Response Model in Computerized Adaptive Testing}, journal = {Applied Psychological Measurement}, volume = {15}, number = {4}, year = {1992}, pages = {327-343}, author = {De Ayals, R.J.} } @article {525, title = {The nominal response model in computerized adaptive testing}, journal = {Applied Psychological Measurement}, volume = {16}, year = {1992}, pages = {327-343}, author = {De Ayala, R. J.,} } @conference {1065, title = {Practical considerations for conducting studies of differential item functioning (DIF) in a CAT environment}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1992}, address = {San Francisco CA}, author = {Miller, T. R.} } @conference {947, title = {Scaling of two-stage adaptive test configurations for achievement testing}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1992}, address = {New Orleans LA}, author = {Hendrickson, A. B. and Kolen, M. J.} } @booklet {1553, title = {Some practical considerations when converting a linearly administered test to an adaptive format (Research Report 92-21 or 13?)}, year = {1992}, address = {Princeton NJ: Educational Testing Service}, author = {Wainer, H.,} } @conference {833, title = {Student attitudes toward computer-adaptive test administration}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1992}, address = {San Francisco CA}, author = {Baghi, H and Ferrara, S. F and Gabrys, R.} } @article {318, title = {Test anxiety and test performance under computerized adaptive testing methods}, journal = {Dissertation Abstracts International}, volume = {52}, number = {7-A}, year = {1992}, pages = {2518}, keywords = {computerized adaptive testing}, author = {Powell, Zen-Hsiu E.} } @conference {1095, title = {Test anxiety and test performance under computerized adaptive testing methods}, booktitle = {Richmond IN: Indiana University. (ERIC Document Reproduction Service No. ED 334910 and/or TM018223). Paper presented at the annual meeting of the American Educational Research Association}, year = {1992}, address = {San Francisco CA}, author = {Powell, Z. E.} } @booklet {1364, title = {An analysis of CAT-ASVAB scores in the Marine Corps JPM data (CRM- 91-161)}, year = {1991}, address = {Alexandria VA: Center for Naval Analysis}, author = {Divgi, D. R.} } @conference {834, title = {Applications of computer-adaptive testing in Maryland}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1991}, address = {Chicago IL}, author = {Baghi, H and Gabrys, R. and Ferrara, S.} } @booklet {1627, title = {Automatic item selection (AIS) methods in the ETS testing environment (Research Memorandum 91-5)}, year = {1991}, address = {Princeton NJ: Educational Testing Service}, author = {Stocking, M. L. and Swanson, L. and Pearlman, M.} } @article {781, title = {Building algebra testlets: A comparison of hierarchical and linear structures}, journal = {Journal of Educational Measurement}, volume = {8}, year = {1991}, pages = {xxx-xxx}, author = {Wainer, H., and Lewis, C. and Kaplan, B. and Braswell, J.} } @booklet {1603, title = {Collected works on the legal aspects of computerized adaptive testing}, year = {1991}, address = {Chicago, IL: National Council of State Boards of Nursing, Inc}, author = {Stenson, H. and Graves, P. and Gardiner, J. and Dally, L.} } @article {340, title = {Comparability of computerized adaptive and conventional testing with the MMPI-2}, journal = {Journal of Personality Assessment}, volume = {57}, number = {2}, year = {1991}, note = {Roper, B LBen-Porath, Y SButcher, J NUnited StatesJournal of personality assessmentJ Pers Assess. 1991 Oct;57(2):278-90.}, month = {Oct}, pages = {278-290}, edition = {1991/01/01}, abstract = {A computerized adaptive version and the standard version of the Minnesota Multiphasic Personality Inventory-2 (MMPI-2) were administered 1 week apart to a sample of 155 college students to assess the comparability of the two versions. The countdown method was used to adaptively administer Scales L, F, the I0 clinical scales, and the 15 new content scales. Profiles across administration modalities show a high degree of similarity, providing evidence for the comparability of computerized adaptive and conventional testing with the MMPI-2. Substantial item savings were found with the adaptive version. Future directions in the study of adaptive testing with the MMPI-2 are discussed.}, isbn = {0022-3891 (Print)}, author = {Roper, B. L. and Ben-Porath, Y. S. and Butcher, J. N.} } @article {628, title = {Comparability of decisions for computer adaptive and written examinations}, journal = {Journal of Allied Health}, volume = {20}, year = {1991}, pages = {15-23}, author = {Lunz, M. E. and Bergstrom, Betty A.} } @article {235, title = {A comparison of paper-and-pencil, computer-administered, computerized feedback, and computerized adaptive testing methods for classroom achievement testing}, journal = {Dissertation Abstracts International}, volume = {52}, number = {5-A}, year = {1991}, pages = {1719}, keywords = {computerized adaptive testing}, author = {Kuan, Tsung Hao} } @article {593, title = {A comparison of procedures for content-sensitive item selection}, journal = {Applied Measurement in Education}, year = {1991}, author = {Kingsbury, G. G.} } @article {596, title = {A comparison of procedures for content-sensitive item selection in computerized adaptive tests}, journal = {Applied Measurement in Education}, volume = {4}, year = {1991}, pages = {241-261}, author = {Kingsbury, G. G. and A Zara} } @booklet {1329, title = {Comparisons of computer adaptive and pencil and paper tests}, year = {1991}, note = {Unpublished manuscript.}, address = {Chicago IL: American Society of Clinical Pathologists}, author = {Bergstrom, Betty A. and Lunz, M. E.} } @inbook {1788, title = {Computerized adaptive testing: Theory, applications, and standards}, year = {1991}, address = {R. K. Hambleton and J. N. Zaal (Eds.), Advances in educational and psychological testing: Theory and Applications (pp. 341-366). Boston: Kluwer.}, author = {Hambleton, R. K. and Zaal, J. N. and Pieters, J. P. M.} } @conference {848, title = {Confidence in pass/fail decisions for computer adaptive and paper and pencil examinations}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1991}, address = {Chicago IL}, author = {Bergstrom, B. B and Lunz, M. E.} } @booklet {1431, title = {Construction and validation of the SON-R 5-17, the Snijders-Oomen non-verbal intelligence test}, year = {1991}, address = {Groningen: Wolters-Noordhoff}, author = {Laros, J. A. and Tellegen, P. J.} } @article {585, title = {Correlates of examinee item choice behavior in self-adapted testing}, journal = {Mid-Western Educational Researcher}, volume = {4}, year = {1991}, pages = {25-28}, author = {Johnson, J. L. and Roos, L. L. and Wise, S. L. and Plake, B. S.} } @conference {903, title = {The development and evaluation of a computerized adaptive testing system}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1991}, note = {ERIC No. ED 338 711)}, address = {Chicago IL}, author = {De la Torre, R. and Vispoel, W. P.} } @conference {990, title = {Development and evaluation of hierarchical testlets in two-stage tests using integer linear programming}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1991}, address = {Chicago IL}, author = {Lam, T. L. and Goong, Y. Y.} } @conference {1126, title = {An empirical comparison of self-adapted and maximum information item selection}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1991}, address = {Chicago}, author = {Rocklin, T. R. and O{\textquoteright}Donnell, A. M.} } @conference {926, title = {Individual differences in computer adaptive testing: Anxiety, computer literacy, and satisfaction}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education.}, year = {1991}, author = {Gershon, R. C. and Bergstrom, B.} } @article {62, title = {Inter-subtest branching in computerized adaptive testing}, journal = {Dissertation Abstracts International}, volume = {52}, number = {1-A}, year = {1991}, pages = {140-141}, keywords = {computerized adaptive testing}, author = {Chang, S-H.} } @booklet {337, title = {Patterns of alcohol and drug use among federal offenders as assessed by the Computerized Lifestyle Screening Instrument}, number = {R-11}, year = {1991}, publisher = {Research and Statistics Branch, Correctional Service of Canada}, address = {Ottawa, ON. Canada}, keywords = {computerized adaptive testing, drug abuse, substance use}, isbn = {R-11}, author = {Robinson, D. and Porporino, F. J. and Millson, W. A.} } @booklet {1483, title = {A psychometric comparison of computerized and paper-and-pencil versions of the national RN licensure examination}, year = {1991}, address = {Chicago IL: Author, Unpublished report}, author = {National-Council-of-State-Boards-of-Nursing} } @article {703, title = {On the reliability of testlet-based tests}, journal = {Journal of Educational Measurement}, volume = {28}, year = {1991}, pages = {237-247}, author = {Sireci, S. G. and Wainer, H., and Thissen, D.} } @booklet {1407, title = {A simulation study of some simple approaches to the study of DIF for CATs}, year = {1991}, address = {Internal memorandum, Educational Testing Service}, author = {Holland, P. W. and Zwick, R.} } @booklet {1554, title = {Some empirical guidelines for building testlets (Technical Report 91-56)}, year = {1991}, address = {Princeton NJ: Educational Testing Service, Program Statistics Research}, author = {Wainer, H., and Kaplan, B. and Lewis, C.} } @conference {134, title = {The use of the graded response model in computerized adaptive testing of the attitudes to science scale}, booktitle = {annual meeting of the American Education Research Association}, year = {1991}, month = {April 3-7, 1991}, address = {Chicago, IL USA}, abstract = {The graded response model for two-stage testing was applied to an attitudes toward science scale using real-data simulation. The 48-item scale was administered to 920 students at a grade-8 equivalent in Singapore. A two-stage 16-item computerized adaptive test was developed. In two-stage testing an initial, or routing, test is followed by a second-stage testlet of greater or lesser difficulty based on performance. A conventional test of the same length as the adaptive two-stage test was selected from the 48-item pool. Responses to the conventional test, the routing test, and a testlet were simulated. The algorithm of E. Balas (1965) and the multidimensional knapsack problem of optimization theory were used in test development. The simulation showed the efficiency and accuracy of the two-stage test with the graded response model in estimating attitude trait levels, as evidenced by better results from the two-stage test than its conventional counterpart and the reduction to one-third of the length of the original measure. Six tables and three graphs are included. (SLD)}, author = {Foong, Y-Y. and Lam, T-L.} } @article {2023, title = {The Use of Unidimensional Parameter Estimates of Multidimensional Items in Adaptive Testing}, journal = {Applied Psychological Measurement}, volume = {15}, year = {1991}, pages = {13-24}, author = {Ackerman, T. A.} } @article {434, title = {The use of unidimensional parameter estimates of multidimensional items in adaptive testing}, journal = {Applied Psychological Measurement}, volume = {15}, year = {1991}, pages = {13-24}, author = {Ackerman, T. A.} } @conference {1045, title = {What lies ahead? Computer technology and its implications for personnel testing}, booktitle = {Keynote address}, year = {1991}, address = {NATO Workshop on Computer-based Assessment of Military Personnel, Brussels, Belgium}, author = {J. R. McBride} } @article {592, title = {Adapting adaptive testing: Using the MicroCAT Testing System in a local school district}, journal = {Educational Measurement: Issues and Practice}, volume = {29 (2)}, year = {1990}, pages = {3-6}, author = {Kingsbury, G. G.} } @booklet {1556, title = {An adaptive algebra test: A testlet-based, hierarchically structured test with validity-based scoring}, year = {1990}, note = {Princeton NJ: Educational testing Service.}, address = {ETS Technical Report 90-92}, author = {Wainer, H., and Lewis, C. and Kaplan, B, and Braswell, J.} } @article {702, title = {Adaptive designs for Likert-type data: An approach for implementing marketing research}, journal = {Journal of Marketing Research}, volume = {27}, year = {1990}, pages = {304-321}, author = {Singh, J. and Howell, R. D. and Rhoads, G. K.} } @article {654, title = {Applying computerized adaptive testing in schools}, journal = {Measurement and Evaluation in Counseling and Development}, volume = {23}, year = {1990}, pages = {311-38}, author = {Olson, J. B} } @conference {975, title = {Assessing the utility of item response models: Computerized adaptive testing}, booktitle = {A paper presented to the annual meeting of the National Council of Measurement in Education}, year = {1990}, address = {Boston MA}, author = {Kingsbury, G. G. and Houser, R.L.} } @booklet {1492, title = {A comparison of Rasch and three-parameter logistic models in computerized adaptive testing}, year = {1990}, address = {Unpublished manuscript}, author = {Parker, S.B. and J. R. McBride} } @article {1380, title = {A comparison of three decision models for adapting the length of computer-based mastery tests}, journal = {Journal of Educational Computing Research}, volume = {6}, year = {1990}, pages = {479-513}, author = {Frick, T. W.} } @article {689, title = {Computer testing: Pragmatic issues and research needs}, journal = {Educational Measurement: Issues and Practice}, volume = {9 (2)}, year = {1990}, note = {Sum 1990.}, pages = {19-20}, author = {Rudner, L. M.} } @article {598, title = {Computerized adaptive measurement of attitudes}, journal = {Measurement and Evaluation in Counseling and Development}, volume = {23}, year = {1990}, pages = {20-30}, author = {Koch, W. R. and Dodd, B. G. and Fitzpatrick, S. J.} } @conference {1219, title = {Computerized adaptive music tests: A new solution to three old problems}, booktitle = {Paper presented at the biannual meeting of the Music Educators National Conference}, year = {1990}, address = {Washington DC}, author = {Vispoel, W. P.} } @book {1727, title = {Computerized adaptive testing: A primer (Eds.)}, year = {1990}, address = {Hillsdale NJ: Erlbaum}, author = {Wainer, H., and Dorans, N. J. and Flaugher, R. and Green, B. F. and Mislevy, R. J. and Steinberg, L. and Thissen, D.} } @article {435, title = {The construction of customized two-staged tests}, volume = {27}, year = {1990}, pages = {241-253}, author = {Adema, J. J.} } @inbook {1930, title = {Creating adaptive tests of musical ability with limited-size item pools}, year = {1990}, address = {D. Dalton (Ed.), ADCIS 32nd International Conference Proceedings (pp. 105-112). Columbus OH: Association for the Development of Computer-Based Instructional Systems.}, author = {Vispoel, W. T. and Twing, J. S} } @conference {1272, title = {Dichotomous search strategies for computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association.}, year = {1990}, author = {Xiao, B.} } @article {2021, title = {The Effect of Item Selection Procedure and Stepsize on Computerized Adaptive Attitude Measurement Using the Rating Scale Model}, journal = {Applied Psychological Measurement}, volume = {14}, year = {1990}, pages = {355-366}, author = {Dodd, B. G.} } @article {103, title = {The effect of item selection procedure and stepsize on computerized adaptive attitude measurement using the rating scale model}, journal = {Applied Psychological Measurement}, volume = {14}, number = {4}, year = {1990}, pages = {355-386}, abstract = {Real and simulated datasets were used to investigate the effects of the systematic variation of two major variables on the operating characteristics of computerized adaptive testing (CAT) applied to instruments consisting of poly- chotomously scored rating scale items. The two variables studied were the item selection procedure and the stepsize method used until maximum likelihood trait estimates could be calculated. The findings suggested that (1) item pools that consist of as few as 25 items may be adequate for CAT; (2) the variable stepsize method of preliminary trait estimation produced fewer cases of nonconvergence than the use of a fixed stepsize procedure; and (3) the scale value item selection procedure used in conjunction with a minimum standard error stopping rule outperformed the information item selection technique used in conjunction with a minimum information stopping rule in terms of the frequencies of nonconvergent cases, the number of items administered, and the correlations of CAT 0 estimates with full scale estimates and known 0 values. The implications of these findings for implementing CAT with rating scale items are discussed. Index terms: }, author = {Dodd, B. G.} } @article {564, title = {The effects of variable entry on bias and information of the Bayesian adaptive testing procedure}, journal = {Educational and Psychological Measurement}, volume = {50}, year = {1990}, pages = {785-802}, author = {Hankins, J. A.} } @conference {843, title = {An empirical study of the computer adaptive MMPI-2}, booktitle = {Paper presented at the 25th Annual Symposium on recent developments in the MMPI/MMPI-2}, year = {1990}, address = {Minneapolis MN}, author = {Ben-Porath, Y. S. and Roper, B. L. and Butcher, J. N.} } @inbook {1937, title = {Future challenges}, year = {1990}, address = {H. Wainer (Ed.), Computerized adaptive testing: A primer (pp. 233-272). Hillsdale NJ: Erlbaum.}, author = {Wainer, H., and Dorans, N. J. and Green, B. F. and Mislevy, R. J. and Steinberg, L. and Thissen, D.} } @article {37, title = {Future directions for the National Council: the Computerized Adaptive Testing Project}, journal = {Issues}, volume = {11}, number = {4}, year = {1990}, note = {911613080885-0046Journal Article}, pages = {1, 3, 5}, keywords = {*Computers, *Licensure, Educational Measurement/*methods, Societies, Nursing, United States}, author = {Bouchard, J.} } @booklet {1337, title = {Future directions for the National Council: The Computerized Adaptive Testing Project}, year = {1990}, address = {Issues, 11, 1-5(National Council of State Boards of Nursing)}, author = {Bouchard, J.} } @booklet {1579, title = {Generative adaptive testing with digit span items}, year = {1990}, address = {San Diego, CA: Testing Systems Department, Navy Personnel Research and Development Center}, author = {Wolfe, J. H. and Larson, G. E.} } @conference {1130, title = {Illustration of computerized adaptive testing with the MMPI-2}, booktitle = {Paper presented at the 98th Annual Meeting of the American Psychological Association}, year = {1990}, address = {Boston MA}, author = {Roper, B. L. and Ben-Porath, Y. S. and Butcher, J. N.} } @inbook {1934, title = {Important issues in CAT}, year = {1990}, address = {H. Wainer et al., Computerized adaptive testing: A primer. Hillsdale NJ: Erlbaum.}, author = {Wainer, H.,} } @inbook {1935, title = {Introduction and history}, year = {1990}, address = {In H. Wainer (Ed.), Computerized adaptive testing: A Primer (pp. 1 - 21). Hillsdale NJ: Erlbaum.}, author = {Wainer, H.,} } @inbook {1939, title = {Item response theory, item calibration, and proficiency estimation}, year = {1990}, address = {H. Wainer (Ed.), Computerized adaptive testing: A primer (pp. 65-102). Hillsdale NJ: Erlbaum.}, author = {Wainer, H., and Mislevy, R. J.} } @conference {1225, title = {MusicCAT: An adaptive testing program to assess musical ability}, booktitle = {Paper presented at the ADCIS 32nd International Conference}, year = {1990}, address = {San Diego CA}, author = {Vispoel, W. P. and Coffman, D. and Scriven, D.} } @article {190, title = {National Council Computerized Adaptive Testing Project Review--committee perspective}, journal = {Issues}, volume = {11}, number = {4}, year = {1990}, note = {911613110885-0046Journal Article}, pages = {3}, keywords = {*Computers, *Licensure, Educational Measurement/*methods, Feasibility Studies, Societies, Nursing, United States}, author = {Haynes, B.} } @inbook {1907, title = {Reliability and measurement precision}, year = {1990}, address = {H. Wainer, N. J. Dorans, R. Flaugher, B. F. Green, R. J. Mislevy, L. Steinberg, and D. Thissen (Eds.), Computerized adaptive testing: A primer (pp. 161-186). Hillsdale NJ: Erlbaum.}, author = {Thissen, D.} } @inbook {1961, title = {A research proposal for field testing CAT for nursing licensure examinations}, year = {1990}, address = {Delegate Assembly Book of Reports 1989. Chicago: National Council of State Boards of Nursing.}, author = {A Zara} } @article {731, title = {Sequential item response models with an ordered response}, journal = {British Journal of Mathematical and Statistical Psychology}, volume = {43}, year = {1990}, pages = {39-55}, author = {Tutz, G.} } @article {98, title = {A simulation and comparison of flexilevel and Bayesian computerized adaptive testing}, journal = {Journal of Educational Measurement}, volume = {27}, number = {3}, year = {1990}, pages = {227-239}, abstract = {Computerized adaptive testing (CAT) is a testing procedure that adapts an examination to an examinee{\textquoteright}s ability by administering only items of appropriate difficulty for the examinee. In this study, the authors compared Lord{\textquoteright}s flexilevel testing procedure (flexilevel CAT) with an item response theory-based CAT using Bayesian estimation of ability (Bayesian CAT). Three flexilevel CATs, which differed in test length (36, 18, and 11 items), and three Bayesian CATs were simulated; the Bayesian CATs differed from one another in the standard error of estimate (SEE) used for terminating the test (0.25, 0.10, and 0.05). Results showed that the flexilevel 36- and 18-item CATs produced ability estimates that may be considered as accurate as those of the Bayesian CAT with SEE = 0.10 and comparable to the Bayesian CAT with SEE = 0.05. The authors discuss the implications for classroom testing and for item response theory-based CAT.}, keywords = {computerized adaptive testing}, author = {De Ayala, R. J., and Dodd, B. G. and Koch, W. R.} } @article {659, title = {Software review: MicroCAT Testing System Version 3}, journal = {Journal of Educational Measurement}, volume = {7}, year = {1990}, pages = {82-88}, author = {Patience, W. M.} } @conference {847, title = {The stability of Rasch pencil and paper item calibrations on computer adaptive tests}, booktitle = {Paper presented at the Midwest Objective Measurement Seminar}, year = {1990}, address = {Chicago IL}, author = {Bergstrom, Betty A. and Lunz, M. E.} } @inbook {1909, title = {Testing algorithms}, year = {1990}, address = {H. Wainer (Ed.), Computerized adaptive testing: A primer (pp. 103-135). Hillsdale NJ: Erlbaum.}, author = {Thissen, D. and Mislevy, R. J.} } @inbook {1940, title = {Testing algorithms}, year = {1990}, address = {H. Wainer (Ed.), Computerized adaptive testing: A primer (pp. 103-135). Hillsdale NJ: Erlbaum.}, author = {Wainer, H., and Mislevy, R. J.} } @proceedings {264, title = {Test-retest consistency of computer adaptive tests.}, journal = {annual meeting of the National Council on Measurement in Education}, year = {1990}, month = {04/1990}, address = {Boston, MA USA}, author = {Lunz, M. E. and Bergstrom, Betty A. and Gershon, R. C.} } @article {780, title = {Toward a psychometrics for testlets}, journal = {Journal of Educational Measurement}, volume = {27}, year = {1990}, pages = {1-14}, author = {Wainer, H., and Lewis, C.} } @article {614, title = {Using Bayesian decision theory to design a computerized mastery test}, journal = {Applied Psychological Measurement}, volume = {14}, year = {1990}, pages = {367-386}, author = {Lewis, C., and Sheehan, K.} } @article {2022, title = {Using Bayesian Decision Theory to Design a Computerized Mastery Test}, journal = {Applied Psychological Measurement}, volume = {14}, year = {1990}, pages = {367-386}, author = {Lewis, C. and Sheehan, K.} } @booklet {1390, title = {Utility of predicting starting abilities in sequential computer-based adaptive tests (Research Report 90-1)}, year = {1990}, address = {Baltimore MD: Johns Hopkins University, Department of Psychology}, author = {Green, B. F. and Thomas, T. J.} } @inbook {1896, title = {Validity}, year = {1990}, address = {H. Wainer (Ed.), Computerized adaptive testing: A primer (pp. 187-231). Hillsdale NJ: Erlbaum.}, author = {Steinberg, L. and Thissen, D. and Wainer, H.,} } @booklet {1592, title = {Validity study in multidimensional latent space and efficient computerized adaptive testing (Final Report R01-1069-11-004-91)}, year = {1990}, address = {Knoxville TN: University of Tennessee, Department of Psychology}, author = {Samejima, F.} } @conference {995, title = {What can we do with computerized adaptive testing and what we cannot do? }, booktitle = {Paper presented at the annual meeting of the Regional Language Center Seminar}, year = {1990}, note = {ERIC No. ED 322 7829}, author = {Laurier, M.} } @article {569, title = {Adaptive and conventional versions of the DAT: The first complete test battery comparison}, journal = {Applied Psychological Measurement}, volume = {13}, year = {1989}, pages = {363-371}, author = {Henly, S. J. and Klebe, K. J. and J. R. McBride and Cudeck, R.} } @article {2019, title = {Adaptive and Conventional Versions of the DAT: The First Complete Test Battery Comparison}, journal = {Applied Psychological Measurement}, volume = {13}, year = {1989}, pages = {363-371}, author = {Henly, S. J. and Klebe, K. J. and J. R. McBride and Cudeck, R.} } @article {547, title = {Adaptive estimation when the unidimensionality assumption of IRT is violated}, journal = {Applied Psychological Measurement}, volume = {13}, year = {1989}, pages = {373-389}, author = {Folk, V.G., and Green, B. F.} } @article {2020, title = {Adaptive Estimation When the Unidimensionality Assumption of IRT is Violated}, journal = {Applied Psychological Measurement}, volume = {13}, year = {1989}, pages = {373-390}, author = {Folk, V.G. and Green, B. F.} } @article {325, title = {Adaptive testing: The evolution of a good idea}, journal = {Educational Measurement: Issues and Practice}, volume = {8}, number = {3}, year = {1989}, pages = {11-15}, keywords = {computerized adaptive testing}, isbn = {1745-3992}, author = {Reckase, M. D.} } @article {209, title = {Application of computerized adaptive testing to the University Entrance Exam of Taiwan, R.O.C}, journal = {Dissertation Abstracts International}, volume = {49}, number = {12-A, Pt 1}, year = {1989}, pages = {3662}, keywords = {computerized adaptive testing}, author = {Hung, P-H.} } @mastersthesis {350, title = {An applied study on computerized adaptive testing}, year = {1989}, pages = {185}, school = {University of Groingen}, type = {Dissertation}, address = {Groningen, The Netherlands}, abstract = {(from the cover) The rapid development and falling prices of powerful personal computers, in combination with new test theories, will have a large impact on psychological testing. One of the new possibilities is computerized adaptive testing. During the test administration each item is chosen to be appropriate for the person being tested. The test becomes tailor-made, resolving some of the problems with classical paper-and-pencil tests. In this way individual differences can be measured with higher efficiency and reliability. Scores on other meaningful variables, such as response time, can be obtained easily using computers. /// In this book a study on computerized adaptive testing is described. The study took place at Dutch Railways in an applied setting and served practical goals. Topics discussed include the construction of computerized tests, the use of response time, the choice of algorithms and the implications of using a latent trait model. After running a number of simulations and calibrating the item banks, an experiment was carried out. In the experiment a pretest was administered to a sample of over 300 applicants, followed by an adaptive test. In addition, a survey concerning the attitudes of testees towards computerized testing formed part of the design.}, keywords = {computerized adaptive testing}, author = {Schoonman, W.} } @book {1698, title = {An applied study on computerized adaptive testing}, year = {1989}, address = {Amsterdam, The Netherlands: Swets and Zeitlinger}, author = {Schoonman, W.} } @conference {974, title = {Assessing the impact of using item parameter estimates obtained from paper-and-pencil testing for computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1989}, note = {$\#$KI89-01}, address = {San Francisco}, author = {Kingsbury, G. G. and Houser, R.L.} } @article {549, title = {Bayesian adaptation during computer-based tests and computer-guided practice exercises}, journal = {Journal of Educational Computing Research}, volume = {5(1)}, year = {1989}, pages = {89-114}, author = {Frick, T. W.} } @book {1669, title = {CAT administrator [Computer program]}, year = {1989}, address = {Chicago: Micro Connections}, author = {Gershon, R. C.} } @conference {1048, title = {Commercial applications of computerized adaptive testing}, booktitle = {C.E. Davis Chair, Computerized Adaptive Testing{\textendash}Military and Commercial Developments Ten Years Later: Symposium conducted at the Annual Conference of the Military Testing Association (524-529)}, year = {1989}, address = {San~Antonio, TX}, author = {J. R. McBride} } @booklet {1381, title = {A comparison of an expert systems approach to computerized adaptive testing and an IRT model}, year = {1989}, address = {Unpublished manuscript (submitted to American Educational Research Journal)}, author = {Frick, T. W.} } @article {524, title = {A comparison of the nominal response model and the three-parameter logistic model in computerized adaptive testing}, journal = {Educational and Psychological Measurement}, volume = {49}, year = {1989}, pages = {789-805}, author = {De Ayala, R. J.,} } @conference {948, title = {A comparison of three adaptive testing strategies using MicroCAT}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1989}, note = {$\#$HO89-01 Tables and figures only.)}, address = {San Francisco}, author = {Ho, R. and Hsu, T. C.} } @article {301, title = {Comparisons of paper-administered, computer-administered and computerized adaptive achievement tests}, journal = {Journal of Educational Computing Research}, volume = {5}, number = {3}, year = {1989}, pages = {311-326}, abstract = {This research study was designed to compare student achievement scores from three different testing methods: paper-administered testing, computer-administered testing, and computerized adaptive testing. The three testing formats were developed from the California Assessment Program (CAP) item banks for grades three and six. The paper-administered and the computer-administered tests were identical in item content, format, and sequence. The computerized adaptive test was a tailored or adaptive sequence of the items in the computer-administered test. }, author = {Olson, J. B and Maynes, D. D. and Slawson, D. and Ho, K} } @conference {1047, title = {A computerized adaptive mathematics screening test}, booktitle = {Paper presented at the Annual Meeting of the California Educational Research Association}, year = {1989}, note = {ERIC Document Reproduction Service No. ED 316 554)}, address = {Burlingame, CA}, author = {J. R. McBride} } @book {1714, title = {Computerized adaptive personality assessment}, year = {1989}, address = {Unpublished master{\textquoteright}s thesis, Harvard University, Cambridge MA}, author = {Waller, N. G.} } @booklet {1391, title = {Computerized adaptive tests}, year = {1989}, address = {ERIC Clearinghouse on Tests, Measurement, and Evaluation, no. 107}, author = {Grist, S. and Rudner, L. M. and Wise} } @booklet {1577, title = {A consideration for variable length adaptive tests (Research Report 89-40)}, year = {1989}, address = {Princeton NJ: Educational Testing Service}, author = {Wingersky, M. S.} } @inbook {1816, title = {Die Optimierung der Mebgenauikeit beim branched adaptiven Testen [Optimization of measurement precision for branched-adaptive testing}, year = {1989}, address = {K. D. Kubinger (Ed.), Moderne Testtheorie Ein Abrib samt neusten Beitrgen [Modern test theory Overview and new issues] (pp. 187-218). Weinhem, Germany: Beltz.}, author = {Kubinger, K. D.} } @article {2018, title = {Estimating Reliabilities of Computerized Adaptive Tests}, journal = {Applied Psychological Measurement}, volume = {13}, year = {1989}, pages = {145-149}, author = {Divgi, D. R.} } @book {1640, title = {{\'E}tude de praticabilit{\'e} du testing adaptatif de ma{\^\i}trise des apprentissages scolaires au Qu{\'e}bec : une exp{\'e}rimentation en {\'e}ducation {\'e}conomique secondaire 5}, year = {1989}, note = {[In French]}, address = {Th{\`e}se de doctorat non publi{\'e}e. Montr{\'e}al : Universit{\'e} du Qu{\'e}bec {\`a} Montr{\'e}al. [In French]}, author = {Auger, R.} } @conference {551, title = {EXSPRT: An expert systems approach to computer-based adaptive testing}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, volume = {San Francisco.}, year = {1989}, author = {Frick, T. W. and Plew, G.T. and Luk, H.-K.} } @conference {1271, title = {Golden section search strategies for computerized adaptive testing}, booktitle = {Paper presented at the Fifth International Objective Measurement Workshop}, year = {1989}, note = {$\#$XI89-01}, address = {Berkeley CA}, author = {Xiao, B.} } @conference {1125, title = {Individual differences in item selection in self adaptive testing}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1989}, address = {San Francisco CA}, author = {Rocklin, T.} } @booklet {1503, title = {The interpretation and application of multidimensional item response theory models; and computerized testing in the instructional environment: Final Report (Research Report ONR 89-2)}, year = {1989}, note = {$\#$RE89-02}, address = {Iowa City IA: The American College Testing Program}, author = {Reckase, M. D.} } @conference {859, title = {Investigating the validity of a computerized adaptive test for different examinee groups}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1989}, address = {San Francisco CA}, author = {Buhr, D. C. and Legg, S. M.} } @article {228, title = {An investigation of procedures for computerized adaptive testing using partial credit scoring}, journal = {Applied Measurement in Education}, volume = {2}, number = {4}, year = {1989}, pages = {335-357}, author = {Koch, W. R. and Dodd, B. G.} } @booklet {1515, title = {Item-presentation controls for computerized adaptive testing: Content-balancing versus min-CAT (Research Report 89-1)}, year = {1989}, address = {Baltimore MD: Johns Hopkins University, Department of Psychology, Psychometric Laboratory}, author = {Thomas, T. J. and Green, B. F.} } @article {2017, title = {Operational Characteristics of Adaptive Testing Procedures Using the Graded Response Model}, journal = {Applied Psychological Measurement}, volume = {13}, year = {1989}, pages = {129-143}, author = {Dodd, B. G. and Koch, W. R. and De Ayala, R. J.} } @article {532, title = {Operational characteristics of adaptive testing procedures using the graded response model}, journal = {Applied Psychological Measurement}, volume = {13}, year = {1989}, pages = {129-143}, author = {Dodd, B. G. and Koch, W. R. and De Ayala, R. J.,} } @article {595, title = {Procedures for selecting items for computerized adaptive tests}, journal = {Applied Measurement in Education}, volume = {2}, year = {1989}, pages = {359-375}, author = {Kingsbury, G. G. and A Zara} } @article {809, title = {Providing item feedback in computer-based tests: Effects of initial success and failure}, journal = {Educational and Psychological Measurement}, volume = {49}, year = {1989}, pages = {479-486}, author = {Wise, S. L. and Plake, B. S. and et. al.} } @article {22, title = {A real-data simulation of computerized adaptive administration of the MMPI}, journal = {Psychological Assessment}, volume = {1}, number = {1}, year = {1989}, note = {Article}, pages = {18-22}, abstract = {A real-data simulation of computerized adaptive administration of the MMPI was conducted with data obtained from two personnel-selection samples and two clinical samples. A modification of the countdown method was tested to determine the usefulness, in terms of item administration savings, of several different test administration procedures. Substantial item administration savings were achieved for all four samples, though the clinical samples required administration of more items to achieve accurate classification and/or full-scale scores than did the personnel-selection samples. The use of normative item endorsement frequencies was found to be as effective as sample-specific frequencies for the determination of item administration order. The role of computerized adaptive testing in the future of personality assessment is discussed., (C) 1989 by the American Psychological Association}, keywords = {computerized adaptive testing}, author = {Ben-Porath, Y. S. and Slutske, W. S. and Butcher, J. N.} } @inbook {1933, title = {A research proposal for field testing CAT for nursing licensure examinations}, year = {1989}, address = {Delegate Assembly Book of Reports 1989. Chicago: National Council of State Boards of Nursing, Inc.}, author = {A Zara} } @article {749, title = {Some procedures for computerized ability testing}, journal = {International Journal of Educational Research}, volume = {13(2)}, year = {1989}, pages = {175-187}, author = {van der Linden, W. J. and Zwarts, M. A.} } @article {588, title = {Tailored interviewing: An application of item response theory for personality measurement}, journal = {Journal of Personality Assessment}, volume = {53}, year = {1989}, pages = {502-519}, author = {Kamakura, W. A., and Balasubramanian, S. K.} } @article {716, title = {Testing software review: MicroCAT Version 3}, journal = {. Educational Measurement: Issues and Practice}, volume = {8 (3)}, year = {1989}, pages = {33-38}, author = {Stone, C. A.} } @article {723, title = {Trace lines for testlets: A use of multiple-categorical-response models}, journal = {Journal of Educational Measurement}, volume = {26}, year = {1989}, pages = {247-260}, author = {Thissen, D. and Steinberg, L. and Mooney, J.A.} } @book {1652, title = {Application of appropriateness measurement to a problem in computerized adaptive testing}, year = {1988}, address = {Unpublished doctoral dissertation, University of Illinois}, author = {Candell, G. L.} } @article {796, title = {Assessment of academic skills of learning disabled students with classroom microcomputers}, journal = {School Psychology Review}, volume = {17}, year = {1988}, pages = {81-88}, author = {Watkins, M. W. and Kush, J. C.} } @article {789, title = {The College Board computerized placement tests: An application of computerized adaptive testing}, journal = {Machine-Mediated Learning}, volume = {2}, year = {1988}, pages = {271-282}, author = {W. C. Ward} } @conference {973, title = {A comparison of achievement level estimates from computerized adaptive testing and paper-and-pencil testing}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1988}, note = {{PDF file, 43 KB}}, address = {New Orleans LA}, author = {Kingsbury, G. G. and Houser, R.L.} } @conference {841, title = {A comparison of two methods for the adaptive administration of the MMPI-2 content scales}, booktitle = {Paper presented at the 86th Annual Convention of the American Psychological Association}, year = {1988}, address = {Atlanta GA}, author = {Ben-Porath, Y. S. and Waller, N. G. and Slutske, W. S. and Butcher, J. N.} } @conference {909, title = {Computerized adaptive attitude measurement: A comparison of the graded response and rating scale models}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1988}, address = {New Orleans}, author = {Dodd, B. G. and Koch, W. R. and De Ayala, R. J.,} } @article {97, title = {Computerized adaptive testing: A comparison of the nominal response model and the three parameter model}, journal = {Dissertation Abstracts International}, volume = {48}, number = {10-B}, year = {1988}, pages = {3148}, keywords = {computerized adaptive testing}, author = {De Ayala, R. J.,} } @article {591, title = {Computerized adaptive testing: A four-year-old pilot study shows that CAT can work}, journal = {Technological Horizons in Education}, volume = {16 (4)}, year = {1988}, pages = {73-76}, author = {Kingsbury, G. G. and et. al.} } @conference {1109, title = {Computerized adaptive testing: a good idea waiting for the right technology}, booktitle = {Paper presented at the meeting of the American Educational Research Association}, year = {1988}, address = {New Orleans, April 1988}, author = {Reckase, M. D.} } @conference {1142, title = {Computerized adaptive testing program at Miami-Dade Community College, South Campous}, booktitle = {Laguna Hill CA: League for Innovation in the community College.}, year = {1988}, author = {Schinoff, R. B. and Stead, L.} } @booklet {1435, title = {Computerized adaptive testing: The state of the art in assessment at three community colleges}, year = {1988}, note = {(25431 Cabot Road, Suite 203, Laguna Hills CA 92653)}, address = {Laguna Hills CA: Author}, author = {League-for-Innovation-in-the-Community-College} } @book {1666, title = {Computerized adaptive testing: The state of the art in assessment at three community colleges}, year = {1988}, address = {Laguna Hills CA: League for Innovation in the Community College}, author = {Doucette, D.} } @conference {1046, title = {A computerized adaptive version of the Differential Aptitude Tests}, booktitle = {Paper presented at the meeting of the American Psychological Association}, year = {1988}, address = {Atlanta GA}, author = {J. R. McBride} } @article {615, title = {Computerized mastery testing}, journal = {Machine-Mediated Learning}, volume = {2}, year = {1988}, pages = {283-286}, author = {Lewis, C. and Sheehan, K.} } @inbook {1782, title = {Construct validity of computer-based tests}, year = {1988}, address = {H. Wainer and H. Braun (Eds.), Test validity (pp. 77-103). Hillsdale NJ: Erlbaum.}, author = {Green, B. F.} } @article {560, title = {Critical problems in computer-based psychological measurement, , ,}, journal = {Applied Measurement in Education}, volume = {1}, year = {1988}, pages = {223-231}, author = {Green, B. F.} } @conference {953, title = {The development and evaluation of a microcomputerized adaptive placement testing system for college mathematics}, booktitle = {Paper(s) presented at the annual meeting(s) of the American Educational Research Association}, year = {1988}, address = {1986 (San Francisco CA) and 1987 (Washington DC)}, author = {Hsu, T.-C. and Shermis, M. D.} } @booklet {1458, title = {The equivalence of scores from automated and conventional educational and psychological tests (College Board Report No. 88-8)}, year = {1988}, address = {New York: The College Entrance Examination Board.}, author = {Mazzeo, J. and Harvey, A. L.} } @conference {1117, title = {Fitting the two-parameter model to personality data: The parameterization of the Multidimensional Personality Questionnaire}, booktitle = {Unpublished manuscript.}, year = {1988}, author = {Reise, S. P. and Waller, N. G.} } @booklet {1343, title = {The four generations of computerized educational measurement (Research Report 98-35)}, year = {1988}, address = {Princeton NJ: Educational Testing Service.}, author = {Bunderson, C. V and Inouye, D. K and Olsen, J. B.} } @article {821, title = {Introduction to item response theory and computerized adaptive testing as applied in licensure and certification testing}, journal = {National Clearinghouse of Examination Information Newsletter}, volume = {6}, year = {1988}, pages = {11-17}, author = {A Zara} } @article {486, title = {Item pool maintenance in the presence of item parameter drift}, journal = {Journal of Educational Measurement}, volume = {25}, year = {1988}, pages = {275-285}, author = {Bock, B. D., and Muraki, E. and Pfeiffenberger, W.} } @conference {980, title = {A predictive analysis approach to adaptive testing}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1988}, note = {ERIC No. ED295982).}, address = {New Orleans LA}, author = {Kirisci, L. and Hsu, T.-C.} } @booklet {1601, title = {A procedure for scoring incomplete adaptive tests in high stakes testing}, year = {1988}, address = {Unpublished manuscript. San Diego, CA: Navy Personnel Research and Development Center}, author = {Segall, D. O.} } @conference {99, title = {The Rasch model and missing data, with an emphasis on tailoring test items}, booktitle = {annual meeting of the American Educational Research Association}, year = {1988}, month = {April 5-9}, address = {New Orleans, LA. USA}, abstract = {Many applications of educational testing have a missing data aspect (MDA). This MDA is perhaps most pronounced in item banking, where each examinee responds to a different subtest of items from a large item pool and where both person and item parameter estimates are needed. The Rasch model is emphasized, and its non-parametric counterpart (the Mokken scale) is considered. The possibility of tailoring test items in combination with their estimation is discussed; however, most methods for the estimation of item parameters are inadequate under tailoring. Without special measures, only marginal maximum likelihood produces adequate item parameter estimates under item tailoring. Fischer{\textquoteright}s approximate minimum-chi-square method for estimation of item parameters for the Rasch model is discussed, which efficiently produces item parameters. (TJH)}, author = {de Gruijter, D. N. M.} } @article {557, title = {The Rasch model and multi-stage testing}, journal = {Journal of Educational and Behavioral Statistics}, volume = {13}, year = {1988}, pages = {45-52}, author = {Glas, C. A. W.} } @inbook {1815, title = {On a Rasch-model-based test for non-computerized adaptive testing}, year = {1988}, address = {Langeheine, R. and Rost, J. (Ed.), Latent trait and latent class models. New York: Plenum Press.}, author = {Kubinger, K. D.} } @conference {1164, title = {A real-data simulation of adaptive MMPI administration}, booktitle = {Paper presented at the 23rd Annual Symposium on recent developments in the use of the MMPI}, year = {1988}, address = {St. Petersburg FL}, author = {Slutske, W. S. and Ben-Porath, Y. S. and Butcher, J. N.} } @booklet {1578, title = {Refinement of the Computerized Adaptive Screening Test (CAST) (Final Report, Contract No MDA203 06-C-0373)}, year = {1988}, address = {Washington, DC: American Institutes for Research.}, author = {Wise, L. L. and McHenry, J.J. and Chia, W.J. and Szenas, P.L. and J. R. McBride} } @booklet {1611, title = {Scale drift in on-line calibration (Research Report RR-88-28-ONR)}, number = {(Research Report RR-88-28-ONR)}, year = {1988}, address = {Princeton NJ: Educational Testing Service}, author = {Stocking, M. L.} } @booklet {1617, title = {Scale drift in on-line calibration (Tech Rep. No. ERIC ED389710)}, year = {1988}, address = {Educational Testing Service, Princeton, N.J.}, author = {Stocking, M. L.} } @conference {1018, title = {Simple and effective algorithms [for] computer-adaptive testing}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1988}, note = {$\#$LI88-01}, address = {New Orleans LA}, author = {Linacre, J. M.} } @booklet {1612, title = {Some considerations in maintaining adaptive test item pools (Research Report 88-33-ONR)}, year = {1988}, address = {Princeton NJ: Educational Testing Service}, author = {Stocking, M. L.} } @booklet {1618, title = {Some considerations in maintaining adaptive test item pools (Tech Rep. No. ERIC ED391814)}, year = {1988}, address = {Educational Testing Service, Princeton, N.J.}, author = {Stocking, M. L.} } @book {1643, title = {Users manual for the MicroCAT Testing System, Version 3}, year = {1988}, address = {St. Paul MN: Author.}, author = {Assessment-Systems-Corporation} } @book {1708, title = {An adaptive test of musical memory: An application of item response theory to the assessment of musical ability}, year = {1987}, address = {Doctoral dissertation, University of Illinois. Dissertation Abstracts International, 49, 79A.}, author = {Vispoel, W. P.} } @article {802, title = {Adaptive testing}, journal = {Applied Psychology: An International Review}, volume = {36}, year = {1987}, pages = {249-262}, author = {Weiss, D. J. and Vale, C. D.} } @booklet {1307, title = {Adaptive testing, information, and the partial credit model}, year = {1987}, address = {Melbourne, Australia: University of Melbourne, Center for the Study of Higher Education}, author = {Adams, R. J.} } @article {779, title = {CATS, testlets, and test construction: A rationale for putting test developers back into CAT}, journal = {Journal of Educational Measurement}, volume = {32}, year = {1987}, note = {(volume number appears to incorrect)}, pages = {185-202}, author = {Wainer, H., and Kiely, G. L.} } @booklet {1441, title = {A computer program for adaptive testing by microcomputer (MESA Memorandum No 40)}, year = {1987}, address = {Chicago: University of Chicago. (ERIC ED 280 895.)}, author = {Linacre, J. M.} } @booklet {1432, title = {Computerized adaptive language testing: A Spanish placement exam}, year = {1987}, note = {(ERIC No. FL016939)}, address = {In Language Testing Research Selected Papers from the Colloquium, Monterey CA}, author = {Larson, J. W.} } @conference {899, title = {Computerized adaptive testing: A comparison of the nominal response model and the three-parameter logistic model}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1987}, address = {Washington DC}, author = {De Ayala, R. J., and Koch, W. R.} } @inbook {1952, title = {Computerized adaptive testing for measuring abilities and other psychological variables}, year = {1987}, address = {J. N. Butcher (Ed.), Computerized personality measurement: A practitioners guide (pp. 325-343). New York: Basic Books.}, author = {Weiss, D. J. and Vale, C. D.} } @conference {1044, title = {Computerized adaptive testing made practical: The Computerized Adaptive Edition of the Differential Aptitude Tests}, booktitle = {Presented at the U.S. Department of Labor National Test Development Conference}, year = {1987}, address = {San Francisco, CA}, author = {J. R. McBride} } @conference {907, title = {Computerized adaptive testing with the rating scale model}, booktitle = {Paper presented at the Fourth International Objective Measurement Workshop}, year = {1987}, address = {Chicago}, author = {Dodd, B. G.} } @article {490, title = {Computerized psychological testing: Overview and critique}, journal = {Professional Psychology: Research and Practice}, volume = {1}, year = {1987}, pages = {42-51}, author = {Burke, M. J, and Normand, J and Raju, N. M.} } @article {366, title = {The effect of item parameter estimation error on decisions made using the sequential probability ratio test}, number = {Research Report 87-1}, year = {1987}, institution = {DTIC Document}, address = {Iowa City, IA. USA}, keywords = {computerized adaptive testing, Sequential probability ratio test}, author = {Spray, J. A. and Reckase, M. D.} } @booklet {1607, title = {The effect of item parameter estimation error on the decisions made using the sequential probability ratio test (ACT Research Report Series 87-17)}, year = {1987}, address = {Iowa City IA: American College Testing}, author = {Spray, J. A. and Reckase, M. D.} } @book {1673, title = {The effects of variable entry on bias and information of the Bayesian adaptive testing procedure}, year = {1987}, address = {Dissertation Abstracts International, 47 (8A), 3013}, author = {Hankins, J. A.} } @conference {1055, title = {Equating the computerized adaptive edition of the Differential Aptitude Tests}, booktitle = {Paper presented at the meeting of the American Psychological Association}, year = {1987}, address = {New York}, author = {J. R. McBride and Corpe, V. A. and Wing, H.} } @booklet {1629, title = {Equivalent-groups versus single-group equating designs for the Accelerated CAT-ASVAB Project (Research Memorandum 87-6)}, year = {1987}, address = {Alexandria VA: Center for Naval Analyses}, author = {Stoloff, P. H.} } @booklet {1437, title = {Final report: Feasibility study of a computerized test administration of the CLAST}, year = {1987}, address = {University of Florida: Institute for Student Assessment and Evaluation}, author = {Legg, S. M. and Buhr, D. C.} } @booklet {1589, title = {Full-information item factor analysis from the ASVAB CAT item pool (Methodology Research Center Report 87-1)}, year = {1987}, address = {Chicago IL: University of Chicago}, author = {Zimowski, M. F. and Bock, R. D.} } @booklet {1586, title = {Functional and design specifications for the National Council of State Boards of Nursing adaptive testing system}, year = {1987}, address = {Unpublished manuscript}, author = {A Zara and Bosma, J. and Kaplan, R.} } @inbook {1928, title = {Improving the measurement of musical ability through adaptive testing}, year = {1987}, address = {G. Hayes (Ed.), Proceedings of the 29th International ADCIS Conference (pp. 221-228). Bellingham WA: ADCIS.}, author = {Vispoel, W. P.} } @article {2134, title = {Item clusters and computerized adaptive testing: A case for testlets}, journal = {Journal of Educational Measurement}, volume = {24}, year = {1987}, pages = {185-201}, author = {Wainer, H., and Kiely, G. L.} } @conference {862, title = {Multidimensional adaptive testing: A procedure for sequential estimation of the posterior centroid and dispersion of theta}, booktitle = {Paper presented at the annual meeting of the Psychometric Society}, year = {1987}, address = {Montreal, Canada}, author = {Bloxom, B. M. and Vale, C. D.} } @booklet {1363, title = {Properties of some Bayesian scoring procedures for computerized adaptive tests (Research Memorandum CRM 87-161)}, year = {1987}, address = {Alexandria VA: Center for Naval Analyses}, author = {Divgi, D. R.} } @article {684, title = {Self-adapted testing: A performance improving variation of computerized adaptive testing}, journal = {Journal of Educational Psychology}, volume = {79}, year = {1987}, pages = {315-319}, author = {Rocklin, T. R., and O{\textquoteright}Donnell, A. M.} } @article {709, title = {Two simulated feasibility studies in computerized adaptive testing}, journal = {Applied Psychology: An International Review}, volume = {36}, year = {1987}, pages = {263-277}, author = {Stocking, M. L.} } @article {3, title = {The use of unidimensional item parameter estimates of multidimensional items in adaptive testing}, number = {87-13}, year = {1987}, month = {September, 1987}, pages = {33}, institution = {ACT}, address = {Iowa City, IA}, abstract = {Investigated the effect of using multidimensional (MDN) items in a computer adaptive test setting that assumes a unidimensional item response theory model in 2 experiments, using generated and real data in which difficulty was known to be confounded with dimensionality. Results from simulations suggest that univariate calibration of MDN data filtered out multidimensionality. The closer an item{\textquoteright}s MDN composite aligned itself with the calibrated univariate ability scale{\textquoteright}s orientation, the larger was the estimated discrimination parameter. (PsycINFO Database Record (c) 2003 APA, all rights reserved).}, isbn = {87-13}, author = {Ackerman, T. A.} } @article {530, title = {Wilcox{\textquoteright} closed sequential testing procedure in stratified item domains}, journal = {Methodika}, volume = {1(1)}, year = {1987}, pages = {3-12}, author = {de Gruijter, D. N.} } @article {148, title = {An application of computer adaptive testing with communication handicapped examinees}, journal = {Educational and Psychological Measurement}, volume = {46}, number = {1}, year = {1986}, note = {Using Smart Source Parsingno. pp. MarchJournal Article10.1177/0013164486461003}, pages = {23-35}, abstract = {This study was conducted to evaluate a computerized adaptive testing procedure for the measurement of mathematical skills of entry level deaf college students. The theoretical basis of the study was the Rasch model for person measurement. Sixty persons were tested using an Apple II Plus microcomputer. Ability estimates provided by the computerized procedure were compared for stability with those obtained six to eight weeks earlier from conventional (written) testing of the same subject matter. Students{\textquoteright} attitudes toward their testing experiences also were measured. Substantial increases in measurement efficiency (by reducing test length) were realized through the adaptive testing procedure. Because the item pool used was not specifically designed for adaptive testing purposes, the psychometric quality of measurements resulting from the different testing methods was approximately equal. Attitudes toward computerized testing were favorable.}, keywords = {computerized adaptive testing}, isbn = {0013-1644}, author = {Garrison, W. M. and Baumgarten, B. S.} } @booklet {1555, title = {CATs, testlets, and test construction: A rationale for putting test developers back into CAT (Technical Report 86-71)}, year = {1986}, note = {$\#$WA86-71}, address = {Princeton NJ: Educational Testing Service, Program Statistics Research}, author = {Wainer, H., and Kiely, G. L.} } @inbook {1906, title = {A cognitive error diagnostic adaptive testing system}, year = {1986}, address = {the 28th ADCIS International Conference Proceedings. Washington DC: ADCIS.}, author = {Tatsuoka, K. K.} } @booklet {1561, title = {College Board computerized placement tests: Validation of an adaptive test of basic skills (Research Report 86-29)}, year = {1986}, address = {Princeton NJ: Educational Testing Service.}, author = {W. C. Ward and Kline, R. G. and Flaugher, J.} } @conference {1077, title = {Comparison and equating of paper-administered, computer-administered, and computerized adaptive tests of achievement}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1986}, address = {San Francisco CA}, author = {Olsen, J. B. and Maynes, D. D. and Slawson, D. and Ho, K} } @conference {1155, title = {A computer-adaptive placement test for college mathematics}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1986}, note = {$\#$SH86-01}, address = {San Francisco CA}, author = {Shermis, M. D.} } @conference {1056, title = {Computerized adaptive achievement testing: A prototype}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1986}, address = {San Francisco CA}, author = {J. R. McBride and Moe, K. C.} } @conference {1042, title = {A computerized adaptive edition of the Differential Aptitude Tests}, booktitle = {Paper presented at the meeting of the American Psychological Association}, year = {1986}, note = {ERIC No. ED 285 918)}, address = {Washington DC}, author = {J. R. McBride} } @conference {1043, title = {A computerized adaptive edition of the Differential Aptitude Tests}, booktitle = {Presented at the National Assessment Conference of the Education Commission of the States}, year = {1986}, address = {Boulder, CO}, author = {J. R. McBride} } @inbook {1807, title = {Computerized adaptive testing: A pilot project}, year = {1986}, address = {W. C. Ryan (ed.), Proceedings: NECC 86, National Educational Computing Conference (pp.172-176). Eugene OR: University of Oregon, International Council on Computers in Education.}, author = {Kingsbury, G. G.} } @article {814, title = {Computerized testing technology}, journal = {Advances in Reading/Language Research}, volume = {4}, year = {1986}, pages = {71-78}, author = {Wolfe, J. H.} } @booklet {1362, title = {Determining the sensitivity of CAT-ASVAB scores to changes in item response curves with the medium of administration (Report No.86-189)}, year = {1986}, note = {$\#$DI86-189}, address = {Alexandria VA: Center for Naval Analyses}, author = {Divgi, D. R.} } @article {609, title = {The effects of computer experience on computerized adaptive test performance}, journal = {Educational and Psychological Measurement}, volume = {46}, year = {1986}, pages = {727-733}, author = {Lee, J. A.} } @article {559, title = {Equivalence of conventional and computer presentation of speed tests}, journal = {Applied Psychological Measurement}, volume = {10}, year = {1986}, pages = {23-34}, author = {Greaud, V. A., and Green, B. F.} } @booklet {1321, title = {Final report: Adaptive testing of spatial abilities (ONR 150 531)}, year = {1986}, address = {Princeton, NJ: Educational Testing Service}, author = {Bejar, I. I.} } @booklet {1502, title = {Final report: The use of tailored testing with instructional programs (Research Report ONR 86-1)}, year = {1986}, address = {Iowa City IA: The American College Testing Program, Assessment Programs Area, Test Development Division.}, author = {Reckase, M. D.} } @inbook {1740, title = {The four generations of computerized educational measurement}, year = {1986}, address = {In R. L. Linn (Ed.), Educational Measurement (3rd ed and pp. 367-407). New York: Macmillan.}, author = {Bunderson, C. V and Inouye, D. K and Olsen, J. B.} } @conference {1141, title = {Measuring up in an individualized way with CAT-ASVAB: Considerations in the development of adaptive testing pools}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1986}, note = {(ERIC No. ED 269 463)}, address = {San Franciso CA}, author = {Schartz, M.} } @conference {983, title = {Operational characteristics of adaptive testing procedures using partial credit scoring}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1986}, note = {$\#$KO86-01}, address = {San Francisco CA}, author = {Koch, W. R. and Dodd. B. G.} } @article {722, title = {Some applications of optimization algorithms in test design and adaptive testing}, journal = {Applied Psychological Measurement}, volume = {10}, year = {1986}, pages = {381-389}, author = {Theunissen, T. J. J. M.} } @article {2016, title = {Some Applications of Optimization Algorithms in Test Design and Adaptive Testing}, journal = {Applied Psychological Measurement}, volume = {10}, year = {1986}, pages = {381-389}, author = {Theunissen, T. J. J. M.} } @article {725, title = {Using microcomputer-based assessment in career counseling}, journal = {Journal of Employment Counseling}, volume = {23}, year = {1986}, pages = {50-56}, author = {Thompson, D. L.} } @article {222, title = {Adaptive self-referenced testing as a procedure for the measurement of individual change due to instruction: A comparison of the reliabilities of change estimates obtained from conventional and adaptive testing procedures}, journal = {Dissertation Abstracts International}, volume = {45}, number = {9-B}, year = {1985}, pages = {3057}, keywords = {computerized adaptive testing}, author = {Kingsbury, G. G.} } @article {800, title = {Adaptive testing by computer}, journal = {Journal of Consulting and Clinical Psychology}, volume = {53}, year = {1985}, pages = {774-789}, author = {Weiss, D. J.} } @article {528, title = {ALPHATAB: A lookup table for Bayesian computerized adaptive testing}, journal = {Applied Psychological Measurement}, volume = {9}, year = {1985}, pages = {326}, author = {De Ayala, R. J., and Koch, W. R.} } @booklet {1497, title = {Armed Services Vocational Aptitude Battery: Development of an adaptive item pool (AFHLR-TR-85-19; Technical Rep No 85-19)}, year = {1985}, address = {Brooks Air Force Base TX: Air Force Human Resources Laboratory}, author = {Prestwood, J. S. and Vale, C. D. and Massey, R. H. and Welsh, J. R.} } @conference {982, title = {Computerized adaptive attitude measurement}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1985}, address = {Chicago}, author = {Koch, W. R. and Dodd, B. G.} } @article {636, title = {Computerized adaptive testing}, journal = {Educational Leadership}, volume = {43}, year = {1985}, pages = {25-28}, author = {J. R. McBride} } @conference {1050, title = {Computerized adaptive testing: An overview and an example}, booktitle = {Presented at the Assessment Conference of the Education Commission of the States}, year = {1985}, address = {Boulder, CO}, author = {J. R. McBride} } @article {2118, title = {Controlling item exposure conditional on ability in computerized adaptive testing}, journal = {Journal of Educational and Behavioral Statistics}, volume = {23}, year = {1985}, pages = {57-75}, author = {Sympson, J. B. and Hetter, R. D.} } @inbook {1904, title = {Controlling item-exposure rates in computerized adaptive testing}, year = {1985}, address = {Proceedings of the 27th annual meeting of the Military Testing Association (pp. 973-977). San Diego CA: Navy Personnel Research and Development Center.}, author = {Sympson, J. B. and Hetter, R. D.} } @article {43, title = {Current developments and future directions in computerized personality assessment}, journal = {Journal of Consulting and Clinical Psychology}, volume = {53}, number = {6}, year = {1985}, note = {Miscellaneous Article}, pages = {803-815}, abstract = {Although computer applications in personality assessment have burgeoned rapidly in recent years, the majority of these uses capitalize on the computer{\textquoteright}s speed, accuracy, and memory capacity rather than its potential for the development of new, flexible assessment strategies. A review of current examples of computer usage in personality assessment reveals wide acceptance of automated clerical tasks such as test scoring and even test administration. The computer is also assuming tasks previously reserved for expert clinicians, such as writing narrative interpretive reports from test results. All of these functions represent automation of established assessment devices and interpretive strategies. The possibility also exists of harnessing some of the computer{\textquoteright}s unique adaptive capabilities to alter standard devices and even develop new ones. Three proposed strategies for developing computerized adaptive personality tests are described, with the conclusion that the computer{\textquoteright}s potential in this area justifies a call for further research efforts., (C) 1985 by the American Psychological Association}, author = {Butcher, J. N. and Keller, L. S. and Bacon, S. F.} } @booklet {1525, title = {Development of a microcomputer-based adaptive testing system: Phase II Implementation (Research Report ONR 85-5)}, year = {1985}, address = {St. Paul MN: Assessment Systems Corporation}, author = {Vale, C. D.} } @article {374, title = {Equivalence of scores from computerized adaptive and paper-and-pencil ASVAB tests}, number = {CNR 113}, year = {1985}, pages = {100}, institution = {Center for Naval Analysis}, address = {Alexandria, VA. USA}, author = {Stoloff, P. H.} } @booklet {1573, title = {Final report: Computerized adaptive measurement of achievement and ability}, year = {1985}, address = {Minneapolis: University of Minnesota, Department of Psychology, Psychometric Methods Program}, author = {Weiss, D. J.} } @article {608, title = {Implications for altering the context in which test items appear: A historical perspective on an immediate concern}, journal = {Review of Educational Research}, volume = {55}, year = {1985}, pages = {387-413}, author = {Leary, L. F. and Dorans, N. J.} } @inbook {1947, title = {Introduction}, year = {1985}, address = {In D. J. Weiss (Ed.), New horizons in testing: Latent trait test theory and computerized adaptive testing (pp. 1-8). New York: Academic Press.}, author = {Weiss, D. J.} } @article {729, title = {Latent structure and item sampling models for testing}, journal = {Annual Review of Psychology}, volume = {36}, year = {1985}, pages = {19-48}, author = {Traub, R. E. and Lam, Y. R.} } @conference {1287, title = {Methods of selecting successive items in adaptive testing}, booktitle = {Unpublished manuscript}, year = {1985}, address = {University of Pittsburgh}, author = {Yu, L.} } @article {552, title = {Monitoring item calibrations from data yielded by an adaptive testing procedure}, journal = {Educational Research Quarterly}, volume = {10}, year = {1985}, pages = {9-12}, author = {Garrison, W. M.} } @book {1721, title = {Proceedings of the 1982 Computerized Adaptive Testing Conference}, year = {1985}, address = {Minneapolis: University of Minnesota, Department of Psychology, Computerized Adaptive Testing Laboratory}, author = {Weiss, D. J.} } @inbook {1955, title = {Reducing the predictability of adaptive item sequences}, year = {1985}, address = {Proceedings of the 27th Annual Conference of the Military Testing Association, San Diego, 43-48.}, author = {Wetzel, C. D. and J. R. McBride} } @book {1699, title = {Sequential analysis: Tests and confidence intervals}, year = {1985}, address = {New York: Springer-Verlag}, author = {Siegmund, D.} } @article {91, title = {A structural comparison of conventional and adaptive versions of the ASVAB}, journal = {Multivariate Behavioral Research}, volume = {20}, number = {3}, year = {1985}, note = {Lawrence Erlbaum, US}, pages = {305-322}, abstract = {Examined several structural models of similarity between the Armed Services Vocational Aptitude Battery (ASVAB) and a battery of computerized adaptive tests designed to measure the same aptitudes. 12 plausible models were fitted to sample data in a double cross-validation design. 1,411 US Navy recruits completed 10 ASVAB subtests. A computerized adaptive test version of the ASVAB subtests was developed on item pools of approximately 200 items each. The items were pretested using applicants from military entrance processing stations across the US, resulting in a total calibration sample size of approximately 60,000 for the computerized adaptive tests. Three of the 12 models provided reasonable summaries of the data. One model with a multiplicative structure (M. W. Browne; see record 1984-24964-001) performed quite well. This model provides an estimate of the disattenuated method correlation between conventional testing and adaptive testing. In the present data, this correlation was estimated to be 0.97 and 0.98 in the 2 halves of the data. Results support computerized adaptive tests as replacements for conventional tests. (33 ref) (PsycINFO Database Record (c) 2004 APA, all rights reserved).}, author = {Cudeck, R.} } @proceedings {271, title = {Unidimensional and multidimensional models for item response theory}, journal = {Proceedings of the 1982 Computerized Adaptive Testing Conference}, year = {1985}, month = {06/1982}, pages = {127-148}, publisher = {University of Minnesota, Department of Psychology, Psychometrics Methods Program}, address = {Minneapolis, MN. USA}, author = {McDonald, R. P.} } @conference {1193, title = {Validity of adaptive testing: A summary of research results}, booktitle = {Paper presented at the annual meeting of the American Psychological Association.}, year = {1985}, note = {$\#$SY85-01}, author = {Sympson, J. B. and Moreno, K. E.} } @conference {1067, title = {A validity study of the computerized adaptive testing version of the Armed Services Vocational Aptitude Battery}, booktitle = {Proceedings of the 27th Annual Conference of the Military Testing Association}, year = {1985}, author = {Moreno, K. E. and Segall, D. O. and Kieckhaefer, W. F.} } @book {1678, title = {Adaptive self-referenced testing as a procedure for the measurement of individual change in instruction: A comparison of the reliabilities of change estimates obtained from conventional and adaptive testing procedures}, year = {1984}, address = {Unpublished doctoral dissertation, Univerity of Minnesota, Minneapolis}, author = {Kingsbury, G. G.} } @booklet {1517, title = {Adaptive testing (Final Report Contract OPM-29-80)}, year = {1984}, address = {Urbana-Champaign IL: University of Illinois, Aviation Research Laboratory}, author = {Trollip, S. R.} } @booklet {1308, title = {Analysis of experimental CAT ASVAB data}, year = {1984}, address = {Baltimore MD: Johns Hopkins University, Department of Psychology}, author = {Allred, L. A and Green, B. F.} } @booklet {1388, title = {Analysis of speeded test data from experimental CAT system}, year = {1984}, address = {Baltimore MD: Johns Hopkins University, Department of Psychology}, author = {Greaud, V. A., and Green, B. F.} } @booklet {1513, title = {Application of adaptive testing to a fraction test (Research Report 84-3-NIE)}, year = {1984}, address = {Urbana IL: Univerity of Illinois, Computer-Based Education Research Laboratory}, author = {Tatsuoka, K. K. and Tatsuoka, M. M. and Baillie, R.} } @article {2014, title = {Bias and Information of Bayesian Adaptive Testing}, journal = {Applied Psychological Measurement}, volume = {8}, year = {1984}, pages = {273-285}, author = {Weiss, D. J. and J. R. McBride} } @article {801, title = {Bias and information of Bayesian adaptive testing}, journal = {Applied Psychological Measurement}, volume = {8}, year = {1984}, pages = {273-285}, author = {Weiss, D. J. and J. R. McBride} } @book {1648, title = {A comparison of the maximum likelihood strategy and stradaptive test on a micro-computer}, year = {1984}, note = {$\#$BI84-01}, address = {Unpublished M.S. thesis, University of Wisconsin, Madison.}, author = {Bill, B. C.} } @article {708, title = {Computerized adaptive testing in the Maryland Public Schools}, journal = {MicroCAT News}, volume = {1}, year = {1984}, pages = {1}, author = {Stevenson, J.} } @article {649, title = {Computerized diagnostic testing}, journal = {Journal of Educational Measurement}, volume = {21}, year = {1984}, pages = {391-397}, author = {MCArthur , D.L. and Choppin, B. H.} } @conference {1041, title = {The design of a computerized adaptive testing system for administering the ASVAB}, booktitle = {Presentation at the Annual Meeting of the American Educational Research Association}, year = {1984}, address = {New Orleans, LA}, author = {J. R. McBride} } @booklet {1451, title = {Efficiency and precision in two-stage adaptive testing}, year = {1984}, address = {West Palm Beach Florida: Eastern ERA}, author = {Loyd, B.H.} } @booklet {1400, title = {Evaluation of computerized adaptive testing of the ASVAB}, year = {1984}, address = {San Diego, CA: Navy Personnel Research and Development Center, unpublished manuscript}, author = {Hardwicke, S. and Vicino, F. and J. R. McBride and Nemeth, C.} } @conference {1218, title = {An evaluation of the utility of large scale computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1984}, address = {Chicago}, author = {Vicino, F. L. and Hardwicke, S. B.} } @conference {1217, title = {An evaluation of the utility of large scale computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1984}, address = {New Orleans LA}, author = {Vicino, F. L. and Hardwicke, S. B.} } @booklet {1389, title = {Evaluation plan for the computerized adaptive vocational aptitude battery (Research Report 82-1)}, year = {1984}, note = {Baltimore MD: The Johns Hopkins University, Department of Psychology.}, author = {Green, B. F. and Bock, R. D. and Humphreys, L. G. and Linn, R. L. and Reckase, M. D.} } @article {644, title = {Issues in item banking}, journal = {Journal of Educational Measurement}, volume = {1}, year = {1984}, pages = {315-330}, author = {Millman, J. and Arter, J.A.} } @article {2015, title = {Item Location Effects and Their Implications for IRT Equating and Adaptive Testing}, journal = {Applied Psychological Measurement}, volume = {8}, year = {1984}, pages = {147-154}, author = {Kingston, N. M. and Dorans, N. J.} } @booklet {1498, title = {Microcomputer network for computerized adaptive testing (CAT) (TR-84-33)}, year = {1984}, address = {San Diego CA: Navy Personnel Research and Development Center}, author = {Quan, B. and Park, T. A. and Sandahl, G. and Wolfe, J. H.} } @article {561, title = {A plan for scaling the computerized adaptive Armed Services Vocational Aptitude Battery}, journal = {Journal of Educational Measurement}, volume = {21}, year = {1984}, pages = {347-360}, author = {Green, B. F. and Bock, B. D., and Linn, R. L. and Lord, F. M., and Reckase, M. D.} } @conference {1194, title = {Predictive validity of computerized adaptive testing in a military training environment}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1984}, address = {New Orleans LA}, author = {Sympson, J. B. and Weiss, D. J. and Ree, M. J.} } @article {285, title = {Relationship between corresponding Armed Services Vocational Aptitude Battery (ASVAB) and computerized adaptive testing (CAT) subtests}, journal = {Applied Psychological Measurement}, volume = {8}, number = {2}, year = {1984}, note = {Sage Publications, US}, pages = {155-163}, abstract = {Investigated the relationships between selected subtests from the Armed Services Vocational Aptitude Battery (ASVAB) and corresponding subtests administered as computerized adaptive tests (CATs), using 270 17-26 yr old Marine recruits as Ss. Ss were administered the ASVAB before enlisting and approximately 2 wks after entering active duty, and the CAT tests were administered to Ss approximately 24 hrs after arriving at the recruit depot. Results indicate that 3 adaptive subtests correlated as well with ASVAB as did the 2nd administration of the ASVAB, although CAT subtests contained only half the number of items. Factor analysis showed CAT subtests to load on the same factors as the corresponding ASVAB subtests, indicating that the same abilities were being measured. It is concluded that CAT can achieve the same measurement precision as a conventional test, with half the number of items. (16 ref) }, keywords = {computerized adaptive testing}, author = {Moreno, K. E. and Wetzel, C. D. and J. R. McBride and Weiss, D. J.} } @article {2013, title = {Relationship Between Corresponding Armed Services Vocational Aptitude Battery (ASVAB) and Computerized Adaptive Testing (CAT) Subtests}, journal = {Applied Psychological Measurement}, volume = {8}, year = {1984}, pages = {155-163}, author = {Moreno, K. E. and Wetzel, C. D. and J. R. McBride and Weiss, D. J.} } @conference {1176, title = {The selection of items for decision making with a computer adaptive test}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1984}, address = {New Orleans LA}, author = {Spray, J. A. and Reckase, M. D.} } @article {160, title = {Technical guidelines for assessing computerized adaptive tests}, journal = {Journal of Educational Measurement}, volume = {21}, number = {4}, year = {1984}, pages = {347-360}, keywords = {computerized adaptive testing, Mode effects, paper-and-pencil}, isbn = {1745-3984}, author = {Green, B. F. and Bock, R. D. and Humphreys, L. G. and Linn, R. L. and Reckase, M. D.} } @booklet {1610, title = {Two simulated feasibility studies in computerized adaptive testing (RR-84-15)}, year = {1984}, address = {Princeton NJ: Educational Testing Service}, author = {Stocking, M. L.} } @book {1642, title = {Users manual for the MicroCAT Testing System}, year = {1984}, address = {St. Paul MN: Author}, author = {Assessment-Systems-Corporation} } @article {788, title = {Using microcomputers to administer tests}, journal = {Educational Measurement: Issues and Practice}, volume = {3(2)}, year = {1984}, pages = {16-20}, author = {W. C. Ward} } @article {643, title = {Using microcomputers to administer tests: An alternate point of view}, journal = {Educational Measurement: Issues and Practice}, volume = {3(2)}, year = {1984}, pages = {20-21}, author = {Millman, J.} } @inbook {1781, title = {Adaptive testing by computer}, year = {1983}, address = {R. B. Ekstrom (ed.), Measurement, technology, and individuality in education. New directions for testing and measurement, Number 17. San Francisco: Jossey-Bass.}, author = {Green, B. F.} } @booklet {1418, title = {Alternate forms reliability and concurrent validity of adaptive and conventional tests with military recruits}, year = {1983}, address = {Minneapolis MN: University of Minnesota, Department of Psychology, Computerized Adaptive Testing Laboratory}, author = {Kiely, G. L. and A Zara and Weiss, D. J.} } @article {693, title = {An application of computerized adaptive testing in U. S. Army recruiting.}, journal = {Journal of Computer-Based Instruction}, volume = {10}, year = {1983}, pages = {87-89}, author = {Sands, W. A. and Gade, P. A.} } @booklet {1575, title = {Bias and information of Bayesian adaptive testing (Research Report 83-2)}, year = {1983}, note = {{PDF file, 1.066MB}}, address = {Minneapolis: University of Minnesota, Department of Psychology, Computerized Adaptive Testing Laboratory}, author = {Weiss, D. J. and J. R. McBride} } @inbook {1812, title = {A comparison of IRT-based adaptive mastery testing and a sequential mastery testing procedure}, year = {1983}, address = {D. J. Weiss (Ed.), New horizons in testing: Latent trait test theory and computerized adaptive testing (pp. 257-283). New York: Academic Press.}, author = {Kingsbury, G. G. and Weiss, D. J.} } @inbook {1950, title = {A comparison of IRT-based adaptive mastery testing and a sequential mastery testing procedure}, year = {1983}, address = {D. J. Weiss (Ed.), New horizons in testing: Latent trait theory and computerized adaptive testing (pp. 1-8). New York: Academic Press.}, author = {Kingsbury, G.G. and Weiss, D. J.} } @inbook {226, title = {A comparison of IRT-based adaptive mastery testing and a sequential mastery testing procedure.}, booktitle = {New horizons in testing: Latent trait test theory and computerized adaptive testing}, year = {1983}, pages = {258-283}, publisher = {Academic Press.}, organization = {Academic Press.}, address = {New York, NY. USA}, author = {Kingsbury, G. G. and Weiss, D. J.} } @book {1681, title = {Effects of item parameter error and other factors on trait estimation in latent trait based adaptive testing}, year = {1983}, note = {Dissertation Abstracts International, 44(3-B), 944.}, address = {Unpublished doctoral dissertation, University of Minnesota}, author = {Mattson, J. D.} } @booklet {1474, title = {An evaluation of one- and three-parameter logistic tailored testing procedures for use with small item pools (Research Report ONR83-1)}, year = {1983}, address = {Iowa City IA: American College Testing Program}, author = {McKinley, R. L. and Reckase, M. D.} } @booklet {1572, title = {Final report: Computer-based measurement of intellectual capabilities}, year = {1983}, address = {Minneapolis: University of Minnesota, Department of Psychology, Psychometric Methods Program}, author = {Weiss, D. J.} } @booklet {1576, title = {Influence of fallible item parameters on test information during adaptive testing (Tech Rep 83-15).}, year = {1983}, note = {$\#$WE83-15}, address = {San Diego CA: Navy Personnel Research and Development Center.}, author = {Wetzel, C. D. and J. R. McBride} } @article {774, title = {On item response theory and computerized adaptive testing: The coming technical revolution in testing}, journal = {Journal of College Admissions}, volume = {28}, year = {1983}, pages = {9-16}, author = {Wainer, H.,} } @book {1720, title = {New horizons in testing: Latent trait test theory and computerized adaptive testing}, year = {1983}, address = {New York: Academic Press}, author = {Weiss, D. J.} } @inbook {1915, title = {The person response curve: Fit of individuals to item response theory models}, year = {1983}, address = {D. J. Weiss (Ed.), New horizons in testing: Latent trait test theory and computerized adaptive testing (pp. 83-108). New York: Academic Press.}, author = {Trabin, T. E. and Weiss, D. J.} } @booklet {1401, title = {Predictive utility evaluation of adaptive testing: Results of the Navy research}, year = {1983}, address = {Falls Church VA: The Rehab Group Inc}, author = {Hardwicke, S. and White, K. E.} } @inbook {324, title = {A procedure for decision making using tailored testing.}, booktitle = {New horizons in testing: Latent trait theory and computerized adaptive testing }, year = {1983}, pages = {237-254}, publisher = {Academic Press}, organization = {Academic Press}, address = {New York, NY. USA}, keywords = {CCAT, CLASSIFICATION Computerized Adaptive Testing, sequential probability ratio testing, SPRT}, author = {Reckase, M. D.} } @inbook {1780, title = {The promise of tailored tests}, year = {1983}, address = {H. Wainer and S. Messick (Eds.). Principals of modern psychological measurement (pp. 69-80). Hillsdale NJ: Erlbaum.}, author = {Green, B. F.} } @booklet {1481, title = {Relationship between corresponding Armed Services Vocational Aptitude Battery (ASVAB) and computerized adaptive testing (CAT) subtests (TR 83-27)}, year = {1983}, address = {San Diego CA: Navy Personnel Research and Development Center}, author = {Moreno, K. E. and Wetzel, D. C. and J. R. McBride and Weiss, D. J.} } @booklet {1469, title = {Reliability and validity of adaptive ability tests in a military recruit population (Research Report 83-1)}, year = {1983}, address = {Minneapolis: Department of Psychology, Psychometric Methods Program, Computerized Testing Laboratory}, author = {J. R. McBride and Martin, J. T. and Weiss, D. J.} } @inbook {1851, title = {Reliability and validity of adaptive ability tests in a military setting}, year = {1983}, address = {D. J. Weiss (Ed.), New horizons in testing: Latent trait test theory and computerized adaptive testing (pp. 224-236). New York: Academic Press.}, author = {J. R. McBride and Martin, J. T.} } @booklet {1456, title = {Reliability and validity of adaptive vs. conventional tests in a military recruit population (Research Rep. No. 83-1).}, year = {1983}, note = {{PDF file, 2.787 MB}}, address = {Minneapolis: University of Minnesota, Department of Psychology, Psychometric Methods Program, Computerized Adaptive Testing Laboratory.}, author = {Martin, J. T. and J. R. McBride and Weiss, D. J.} } @inbook {257, title = {Small N justifies Rasch model}, booktitle = {New horizons in testing: Latent trait test theory and computerized adaptive testing}, year = {1983}, pages = {51-61}, publisher = {Academic Press}, organization = {Academic Press}, address = {New York, NY. USA}, author = {Lord, F. M.,}, editor = {Bock, R. D.} } @book {1704, title = {The stochastic modeling of elementary psychological processes}, year = {1983}, address = {Cambridge: Cambridge University Press}, author = {Townsend, J. T. and Ashby, G. F.} } @booklet {1571, title = {The stratified adaptive computerized ability test (Research Report 73-3)}, year = {1983}, address = {Minneapolis: University of Minnesota, Department of Psychology, Computerized Adaptive Testing Laboratory}, author = {Weiss, D. J.} } @booklet {1524, title = {Tailored testing, its theory and practice. Part I: The basic model, the normal ogive submodels, and the tailored testing algorithm (NPRDC TR-83-00)}, year = {1983}, address = {San Diego CA: Navy Personnel Research and Development Center}, author = {Urry, V. W. and Dorans, N. J.} } @article {303, title = {Ability measurement, test bias reduction, and psychological reactions to testing as a function of computer adaptive testing versus conventional testing}, journal = {Dissertation Abstracts International}, volume = {42}, number = {10-B}, year = {1982}, pages = {4233}, keywords = {computerized adaptive testing}, author = {Orban, J. A.} } @article {485, title = {Adaptive EAP estimation of ability in a microcomputer environment}, journal = {Applied Psychological Measurement}, volume = {6}, year = {1982}, pages = {431-444}, author = {Bock, B. D., and Mislevy, R. J.} } @booklet {1518, title = {An adaptive Private Pilot Certification Exam}, year = {1982}, address = {Aviation, Space, and Environmental Medicine}, author = {Trollip, S. R. and Anderson, R. I.} } @conference {925, title = {Assessing mathematics achievement with a tailored testing program}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1982}, note = {$\#$GA82-01}, address = {New York}, author = {Garrison, W. M. and Baumgarten, B. S.} } @article {797, title = {Automated tailored testing using Raven{\textquoteright}s Matrices and the Mill Hill vocabulary tests}, journal = {International Journal of Man-Machine Studies}, volume = {17}, year = {1982}, pages = {331-344}, author = {Watts, K., and Baddeley, A. D. and Williams, M.} } @article {2209, title = {Comparison of live and simulated adaptive tests}, number = {AFHRL-TP-82-35}, year = {1982}, month = {December 1982}, institution = {Air Force Systems Command}, address = {Brooks Air Force Base, Texas}, author = {HUnter, D. R.} } @booklet {1465, title = {Computerized adaptive testing project: Objectives and requirements (Tech Note 82-22)}, year = {1982}, note = {$\#$McB82-22}, address = {San Diego CA: Navy Personnel Research and Development Center. (AD A118 447)}, author = {J. R. McBride} } @booklet {1355, title = {Computerized adaptive testing system design: Preliminary design considerations (Tech. Report 82-52)}, year = {1982}, address = {San Diego CA: Navy Personnel Research and Development Center. (AD A118 495)}, author = {Croll, P. R.} } @booklet {1466, title = {Computerized Adaptive Testing system development and project management.}, year = {1982}, address = {Minutes of the ASVAB (Armed Services Vocational Aptitude Battery) Steering Committee. Washington, DC: Office of the Assistant Secretary of Defense (Manpower, Reserve Affairs and Logistics), Accession Policy Directorate.}, author = {J. R. McBride} } @inbook {1852, title = {The computerized adaptive testing system development project}, year = {1982}, note = {{PDF file, 296 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 1982 Item Response Theory and Computerized Adaptive Testing Conference (pp. 342-349). Minneapolis: University of Minnesota, Department of Psychology.}, author = {J. R. McBride and Sympson, J. B.} } @inbook {1957, title = {Computerized testing in the German Federal Armed Forces (FAF): Empirical approaches}, year = {1982}, note = {PDF file, 384 K}, address = {D. J. Weiss (Ed.), Proceedings of the 1982 Item Response Theory and Computerized Adaptive Testing Conference (pp.353-359). Minneapolis: University of Minnesota, Department of Psychology, Psychometric Methods Program.}, author = {Wildgrube, W.} } @inbook {1919, title = {Design of a Microcomputer-Based Adaptive Testing System}, year = {1982}, note = {{PDF file, 697 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 1979 Item Response Theory and Computerized Adaptive Testing Conference (pp. 360-371). Minneapolis: University of Minnesota, Department of Psychology, Psychometric Methods Program, Computerized Adaptive Testing Laborat}, author = {Vale, C. D.} } @conference {1040, title = {Development of a computerized adaptive testing system for enlisted personnel selection}, booktitle = {Presented at the Annual Convention of the American Psychological Association}, year = {1982}, address = {Washington, DC}, author = {J. R. McBride} } @inbook {1876, title = {Discussion: Adaptive and sequential testing}, year = {1982}, note = {{PDF file, 288 KB}}, address = {D. J. Weiss (Ed.). Proceedings of the 1982 Computerized Adaptive Testing Conference (pp. 290-294). Minneapolis MN: University of Minnesota, Department of Psychology, Psychometric Methods Program.}, author = {Reckase, M. D.} } @article {2012, title = {Improving Measurement Quality and Efficiency with Adaptive Testing}, journal = {Applied Psychological Measurement}, volume = {6}, year = {1982}, pages = {473-492}, author = {Weiss, D. J.} } @inbook {1903, title = {Item Calibrations for Computerized Adaptive Testing (CAT) Experimental Item Pools Adaptive Testing}, year = {1982}, note = {PDF file, 105 K}, address = {D. J. Weiss (Ed.). Proceedings of the 1982 Computerized Adaptive Testing Conference (pp. 290-294). Minneapolis MN: University of Minnesota, Department of Psychology, Psychometric Methods Program.}, author = {Sympson, J. B. and Hartmann, l.} } @conference {1247, title = {Legal and political considerations in large-scale adaptive testing}, booktitle = {Paper presented at the 23rd conference of the Military Testing Association.}, year = {1982}, author = {B. K. Waters and Lee, G. C.} } @booklet {1632, title = {Predictive validity of conventional and adaptive tests in an Air Force training environment (Report AFHRL-TR-81-40)}, year = {1982}, note = {$\#$SY82-01}, address = {Brooks Air Force Base TX: Air Force Human Resources Laboratory, Manpower and Personnel Division}, author = {Sympson, J. B. and Weiss, D. J. and Ree, M. J.} } @article {769, title = {Pros and cons of tailored testing: An examination of issues highlighted with an automated testing system}, journal = {International Journal of Man-Machine Studies}, volume = {17}, year = {1982}, pages = {301-304}, author = {Volans, P. J.} } @inbook {1951, title = {Robustness of adaptive testing to multidimensionality}, year = {1982}, note = {42 MB}}, address = {D. J. Weiss (Ed.), Proceedings of the 1982 Item Response Theory and Computerized Adaptive Testing Conference. Minneapolis: University of Minnesota, Department of Psychology, Psychometric Methods Program. {PDF file, 1.}, author = {Weiss, D. J. and Suhadolnik, D.} } @article {803, title = {Sequential testing for selection}, journal = {Applied Psychological Measurement}, volume = {6}, year = {1982}, pages = {337-351}, author = {Weitzman, R. A.} } @article {2011, title = {Sequential Testing for Selection}, journal = {Applied Psychological Measurement}, volume = {6}, year = {1982}, pages = {337-351}, author = {Weitzman, R. A.} } @inbook {1953, title = {Use of Sequential Testing to Prescreen Prospective Entrants to Military Service.}, year = {1982}, note = {{PDF file, 483 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 1982 Item Response Theory and Computerized Adaptive Testing Conference Minneapolis: University of Minnesota, Department of Psychology, Psychometric Methods Program.}, author = {Weitzman, R. A.} } @book {1688, title = {Ability measurement, test bias reduction, and psychological reactions to testing as a function of computer adaptive testing versus conventional}, year = {1981}, note = {$\#$OR81-01}, address = {Unpublished doctoral dissertation, Virginia Polytechnic Institute and State University. Dissertational Abstracts International, 1982, 42,(10-B), 4233}, author = {Orban, J. A.} } @booklet {1382, title = {Adaptive testing without a computer}, year = {1981}, address = {Catalog of Selected Documents in Psychology, Nov 1981, 11, 74-75 (Ms. No. 2350). AFHRL Technical Report 80-66.}, author = {Friedman, D. and Steinberg, A, and Ree, M. J.} } @article {2227, title = {A comparison of a Bayesian and a maximum likelihood tailored testing procedure}, year = {1981}, institution = {University of Missouri, Department of Educational Psychology, Tailored Testing Research Laboratory}, type = {Technical report}, address = {Columbia MO}, author = {McKinley, R. L., and Reckase, M. D.} } @conference {1131, title = {A comparison of a maximum likelihood and a Bayesian estimation procedure for tailored testing}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1981}, note = {$\#$RO81-01}, address = {Los Angeles CA}, author = {Rosso, M. A. and Reckase, M. D.} } @booklet {1484, title = {A comparison of two methods of interactive testing Final report.}, year = {1981}, address = {National Institute of Education Grant 79-1045}, author = {Nicewander, W. A. and Chang, H. S. and Doody, E. N.} } @article {734, title = {Design and implementation of a microcomputer-based adaptive testing system}, journal = {Behavior Research Methods and Instrumentation}, volume = {13}, year = {1981}, pages = {399-406}, author = {Vale, C. D.} } @book {1660, title = {Effect of error in item parameter estimates on adaptive testing (Doctoral dissertation, University of Minnesota)}, year = {1981}, note = {(University Microfilms No. AAD81-25946)}, address = {Dissertation Abstracts International, 42, 06-B}, author = {Crichton, L. I.} } @article {2010, title = {The Effects of Item Calibration Sample Size and Item Pool Size on Adaptive Testing}, journal = {Applied Psychological Measurement}, volume = {5}, year = {1981}, pages = {11-19}, author = {Ree, M. J.} } @booklet {1457, title = {Factors influencing the psychometric characteristics of an adaptive testing strategy for test batteries (Research Rep. No. 81-4)}, year = {1981}, note = {{PDF file, 1.689 MB}}, address = {Minneapolis: University of Minnesota, Department of Psychology, Psychometric Methods Program, Computerized Adaptive Testing Laboratory}, author = {Maurelli, V. A. and Weiss, D. J.} } @booklet {1570, title = {Final report: Computerized adaptive ability testing}, year = {1981}, address = {Minneapolis: University of Minnesota, Department of Psychology, Psychometric Methods Program}, author = {Weiss, D. J.} } @booklet {1500, title = {Final report: Procedures for criterion referenced tailored testing}, year = {1981}, address = {Columbia: University of Missouri, Educational Psychology Department}, author = {Reckase, M. D.} } @article {813, title = {Optimal item difficulty for the three-parameter normal ogive model}, journal = {Psychometrika}, volume = {46}, year = {1981}, pages = {461-464}, author = {Wolfe, J. H.} } @booklet {1523, title = {Tailored testing, its theory and practice. Part II: Ability and item parameter estimation, multiple ability application, and allied procedures (NPRDC TR-81)}, year = {1981}, note = {Part II: Ability and item parameter estimation, multiple ability application, and allied procedures (NPRDC TR-81)}, address = {San Diego CA: Navy Personnel Research and Development Center}, author = {Urry, V. W.} } @booklet {1501, title = {The use of the sequential probability ratio test in making grade classifications in conjunction with tailored testing (Research Report 81-4)}, year = {1981}, address = {Columbia MO: University of Missouri, Department of Educational Psychology}, author = {Reckase, M. D.} } @booklet {1423, title = {A validity comparison of adaptive and conventional strategies for mastery testing (Research Report 81-3)}, year = {1981}, note = {{PDF file, 1.855 MB}}, address = {Minneapolis, Department of Psychology, Psychometric Methods Program, Computerized Adaptive Testing Laboratory}, author = {Kingsbury, G. G. and Weiss, D. J.} } @inbook {1846, title = {Adaptive verbal ability testing in a military setting}, year = {1980}, note = {{PDF file, 635 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 1979 Computerized Adaptive Testing Conference (pp. 4-15). Minneapolis MN: University of Minnesota, Department of Psychology, Psychometric Methods Program, Computerized Adaptive Testing Laboratory.}, author = {J. R. McBride} } @booklet {1422, title = {An alternate-forms reliability and concurrent validity comparison of Bayesian adaptive and conventional ability tests (Research Report 80-5)}, year = {1980}, note = {{PDF file, 1.11 MB}}, address = {Minneapolis, Department of Psychology, Psychometric Methods Program, Computerized Adaptive Testing Laboratory}, author = {Kingsbury, G. G. and Weiss, D. J.} } @book {1671, title = {A comparative evaluation of two Bayesian adaptive ability estimation procedures}, year = {1980}, address = {Unpublished doctoral dissertation, the Catholic University of America}, author = {Gorman, S.} } @mastersthesis {2221, title = {A comparative evaluation of two Bayesian adaptive ability estimation procedures with a conventional test strategy}, volume = {Ph.D.}, year = {1980}, school = {Catholic University of America}, address = {Washington DC}, author = {Gorman, S.} } @booklet {1421, title = {A comparison of adaptive, sequential, and conventional testing strategies for mastery decisions (Research Report 80-4)}, year = {1980}, note = {{PDF file, 1.905 MB}}, address = {Minneapolis, Department of Psychology, Psychometric Methods Program, Computerized Adaptive Testing Laboratory}, author = {Kingsbury, G. G. and Weiss, D. J.} } @inbook {1811, title = {A comparison of ICC-based adaptive mastery testing and the Waldian probability ratio method}, year = {1980}, note = {51 MB}}, address = {D. J. Weiss (Ed.). Proceedings of the 1979 Computerized Adaptive Testing Conference (pp. 120-139). Minneapolis MN: University of Minnesota, Department of Psychology, Psychometric Methods Program, Computerized Adaptive Testing Laboratory}, author = {Kingsbury, G. G. and Weiss, D. J.} } @inbook {1777, title = {A comparison of the accuracy of Bayesian adaptive and static tests using a correction for regression}, year = {1980}, note = {{PDF file, 735 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 1979 Computerized Adaptive Testing Conference (pp. 35-50). Minneapolis MN: University of Minnesota, Department of Psychology, Computerized Adaptive Testing Laboratory.}, author = {Gorman, S.} } @article {613, title = {Computer applications in audiology and rehabilitation of the hearing impaired}, journal = {Journal of Communication Disorders}, volume = {13}, year = {1980}, pages = {471-481}, author = {Levitt, H.} } @article {639, title = {Computer applications to ability testing}, journal = {Association for Educational Data Systems Journal}, volume = {13}, year = {1980}, pages = {193-203}, author = {McKinley, R. L., and Reckase, M. D.} } @booklet {1417, title = {Computerized instructional adaptive testing model: Formulation and validation (AFHRL-TR-79-33, Final Report)}, year = {1980}, address = {Brooks Air Force Base TX: Air Force Human Resources Laboratory", Also Catalog of Selected Documents in Psychology, February 1981, 11, 20 (Ms. No, 2217) }, author = {Kalisch, S. J.} } @inbook {1956, title = {Computerized testing in the German Federal Armed Forces (FAF)}, year = {1980}, note = {{PDF file, 595 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 1979 Item Response Theory and Computerized Adaptive Testing Conference (pp. 68-77). Minneapolis: University of Minnesota, Department of Psychology, Psychometric Methods Program, Computerized Adaptive Testing Laborator}, author = {Wildgrube, W.} } @booklet {1516, title = {Criterion-related validity of adaptive testing strategies (Research Report 80-3)}, year = {1980}, note = {$\#$TH80-03 {PDF file, 2.708 MB}}, address = {Minneapolis: University of Minnesota, Department of Psychology, Psychometric Methods Program, Computerized Adaptive Testing Laboratory}, author = {Thompson, J. G. and Weiss, D. J.} } @book {1705, title = {Development and evaluation of an adaptive testing strategy for use in multidimensional interest assessment}, year = {1980}, address = {Unpublished doctoral dissertation, University of Minnesota. Dissertational Abstract International, 42(11-B), 4248-4249}, author = {Vale, C. D.} } @inbook {1944, title = {Discussion: Session 1}, year = {1980}, note = {$\#$WA80-01 {PDF file, 283 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 1979 Computerized Adaptive Testing Conference (pp. 51-55). Minneapolis MN: University of Minnesota, Department of Psychology, Psychometric Methods Program, Computerized Adaptive Testing Laboratory.}, author = {B. K. Waters} } @inbook {1861, title = {Discussion: Session 3}, year = {1980}, note = {{PDF file, 286 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 1979 Item Response Theory and Computerized Adaptive Testing Conference (pp. 140-143). Minneapolis: University of Minnesota, Department of Psychology, Psychometric Methods Program, Computerized Adaptive Testing Laborat}, author = {Novick, M. R.} } @booklet {1494, title = {Effects of computerized adaptive testing on Black and White students (Research Report 79-2)}, year = {1980}, note = {{PDF file, 2.323 MB}}, address = {Minneapolis: University of Minnesota, Department of Psychology, Psychometric Methods Program}, author = {Pine, S. M. and Church, A. T. and Gialluca, K. A. and Weiss, D. J.} } @conference {1089, title = {Effects of program parameters and item pool characteristics on the bias of a three-parameter tailored testing procedure}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1980}, note = {A.}, address = {Boston MA, U}, author = {Patience, W. M. and Reckase, M. D.} } @booklet {1427, title = {An empirical study of a broad range test of verbal ability}, year = {1980}, address = {Princeton NJ: Educational Testing Service}, author = {Kreitzberg, C. B. and Jones, D. J.} } @conference {1192, title = {Estimating the reliability of adaptive tests from a single test administration}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1980}, note = {1981 draft copy available.) {PDF File, 7,603 KB}}, address = {Boston}, author = {Sympson, J. B.} } @booklet {1569, title = {Final report: Computerized adaptive performance evaluation}, year = {1980}, address = {Minneapolis: Univerity of Minnesota, Department of Psychology, Computerized Adaptive Testing Laboratory}, author = {Weiss, D. J.} } @booklet {1505, title = {Final Report: Computerized adaptive testing, assessment of requirements}, year = {1980}, address = {Falls Church VA: Author}, author = {Rehab-Group-Inc.} } @article {2009, title = {Implied Orders Tailored Testing: Simulation with the Stanford-Binet}, journal = {Applied Psychological Measurement}, volume = {4}, year = {1980}, pages = {157-163}, author = {Cudeck, R. and McCormick, D. J. and N. Cliff} } @article {521, title = {Implied orders tailored testing: Simulation with the Stanford-Binet}, journal = {Applied Psychological Measurement}, volume = {4}, year = {1980}, pages = {157-163}, author = {Cudeck, R. and McCormick, D. and Cliff, N. A.} } @inbook {1767, title = {Individualized testing on the basis of the Rasch model}, year = {1980}, address = {In J. Th. Van der Kamp, W. F. Langerak, and D. N. M. de Gruijter (Eds.). Psychometrics for educational debates. New York: Wiley.}, author = {Fischer, G. H. and Pendl, P.} } @inbook {1804, title = {A model for computerized adaptive testing related to instructional situations}, year = {1980}, note = {{PDF file, 965 KB}}, address = {D. J. Weiss (Ed.). Proceedings of the 1979 Computerized Adaptive Testing Conference (pp. 101-119). Minneapolis MN: University of Minnesota, Department of Psychology, Psychometric Methods Program, Computerized Adaptive Testing Laboratory.}, author = {Kalisch, S. J.} } @article {660, title = {Operational characteristics of a one-parameter tailored testing procedure}, journal = {Catalog of Selected Documents in Psychology}, volume = {August 1980}, year = {1980}, note = {No. 2104).}, pages = {10, 66 (Ms No. 2104)}, author = {Patience, W. M., and Reckase, M. D.} } @inbook {1802, title = {Parallel forms reliability and measurement accuracy comparison of adaptive and conventional testing strategies}, year = {1980}, note = {{PDF file, 918 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 1979 Computerized Adaptive Testing Conference (pp. 16-34). Minneapolis: University of Minnesota, Department of Psychology, Psychometric Methods Program, Computerized Adaptive Testing Laboratory.}, author = {Johnson, M. J. and Weiss, D. J.} } @book {1719, title = {Proceedings of the 1979 Computerized Adaptive Testing Conference}, year = {1980}, address = {Minneapolis: University of Minnesota, Department of Psychology, Computerized Adaptive Testing Laboratory}, author = {Weiss, D. J.} } @article {651, title = {A simple form of tailored testing}, journal = {British Journal of Educational Psychology}, volume = {50}, year = {1980}, pages = {301-303}, author = {Nisbet, J. and Adams, M. and Arthur, J.} } @inbook {1875, title = {Some decision procedures for use with tailored testing}, year = {1980}, address = {D. J. Weiss (Ed.), Proceedings of the 1979 Computerized Adaptive Testing Conference (pp. 79-100). Minneapolis MN: University of Minnesota, Department of Psychology, Psychometric Methods Program, Computerized Adaptive Testing Laboratory.}, author = {Reckase, M. D.} } @inbook {1831, title = {Some how and which for practical tailored testing}, year = {1980}, address = {L. J. T. van der Kamp, W. F. Langerak and D.N.M. de Gruijter (Eds): Psychometrics for educational debates (pp. 189-206). New York: John Wiley and Sons. Computer-Assisted Instruction, Testing, and Guidance (pp. 139-183). New York: Harper and Row.}, author = {Lord, F. M.,} } @booklet {1473, title = {A successful application of latent trait theory to tailored achievement testing (Research Report 80-1)}, year = {1980}, address = {University of Missouri, Department of Educational Psychology, Tailored Testing Research Laboratory}, author = {McKinley, R. L. and Reckase, M. D.} } @inbook {1794, title = {A validity study of an adaptive test of reading comprehension}, year = {1980}, note = {{PDF file, 676 KB}}, address = {D. J. Weiss (Ed.), Proceedings of the 1979 Computerized Adaptive Testing Conference (pp. 57-67). Minneapolis MN: University of Minnesota, Department of Psychology, Psychometric Methods Program.}, author = {Hornke, L. F. and Sauter, M. B.} } @booklet {1463, title = {Adaptive mental testing: The state of the art (Technical Report 423)}, year = {1979}, address = {Alexandria VA: U.S. Army Research Institute for the Behavioral and Social Sciences.}, author = {J. R. McBride} } @booklet {1420, title = {An adaptive testing strategy for mastery decisions (Research Report 79-5)}, year = {1979}, note = {{PDF file, 2.146 MB}}, address = {Minneapolis: University of Minnesota, Department of Psychology, Psychometric Methods Program}, author = {Kingsbury, G. G. and Weiss, D. J.} } @booklet {1461, title = {Adaptive tests{\textquoteright} usefulness for military personnel screening}, year = {1979}, address = {In M. Wiskoff, Chair, Military Applications of Computerized Adaptive Testing. Symposium presented at the Annual Convention of the American Psychological Association, New York.}, author = {J. R. McBride} } @booklet {1491, title = {Bayesian sequential design and analysis of dichotomous experiments with special reference to mental testing}, year = {1979}, address = {Princeton NJ: Educational Testing Service}, author = {Owen, R. J.} } @article {565, title = {A comparison of a standard and a computerized adaptive paradigm in Bekesy fixed-frequency audiometry}, journal = {Journal of Auditory Research}, volume = {19}, year = {1979}, pages = {1-22}, author = {Harris, J. D. and Smith, P. F.} } @booklet {1464, title = {Computerized adaptive testing: The state of the art (ARI Technical Report 423)}, year = {1979}, address = {Alexandria, VA: U.S. Army Research Institute for the Behavioral and Social Sciences.}, author = {J. R. McBride} } @conference {1191, title = {Criterion-related validity of conventional and adaptive tests in a military environment}, booktitle = {Paper presented at the 1979 Computerized Adaptive Testing Conference}, year = {1979}, address = {Minneapolis MN}, author = {Sympson, J. B.} } @booklet {1512, title = {The danger of relying solely on diagnostic adaptive testing when prior and subsequent instructional methods are different (CERL Report E-5)}, year = {1979}, note = {$\#$TA79-01}, address = {Urbana IL: Univeristy of Illinois, Computer-Based Education Research Laboratory.}, author = {Tatsuoka, K. and Birenbaum, M.} } @article {1967, title = {Efficiency of an adaptive inter-subtest branching strategy in the measurement of classroom achievement (Research Report 79-6)}, year = {1979}, address = {Minneapolis: University of Minnesota, Department of Psychology, Psychometric Methods Program}, author = {Gialluca, K. A. and Weiss, D. J.} } @booklet {1462, title = {An evaluation of computerized adaptive testing}, year = {1979}, address = {In Proceedings of the 21st Military Testing Association Conference. SanDiego, CA: Navy Personnel Research and Development Center.}, author = {J. R. McBride} } @article {516, title = {Evaluation of implied orders as a basis for tailored testing with simulation data}, journal = {Applied Psychological Measurement}, volume = {3}, year = {1979}, pages = {495-514}, author = {Cliff, N. A. and McCormick, D.} } @article {2008, title = {Evaluation of Implied Orders as a Basis for Tailored Testing with Simulation Data}, journal = {Applied Psychological Measurement}, volume = {3}, year = {1979}, pages = {495-514}, author = {N. Cliff and Cudeck, R. and McCormick, D. J.} } @article {574, title = {Four realizations of pyramidal adaptive testing}, journal = {Programmed Larning and Educational Technology}, volume = {16}, year = {1979}, pages = {164-169}, author = {Hornke, L. F.} } @article {2007, title = {Monte Carlo Evaluation of Implied Orders As a Basis for Tailored Testing}, journal = {Applied Psychological Measurement}, volume = {3}, year = {1979}, pages = {65-74}, author = {Cudeck, R. and McCormick, D. and N. Cliff} } @article {520, title = {Monte carlo evaluation of implied orders as a basis for tailored testing}, journal = {Applied Psychological Measurement}, volume = {3}, year = {1979}, pages = {65-74}, author = {Cudeck, R. and McCormick, D. J. and Cliff, N. A.} } @conference {1088, title = {Operational characteristics of a Rasch model tailored testing procedure when program parameters and item pool attributes are varied}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {1979}, address = {San Francisco}, author = {Patience, W. M. and Reckase, M. D.} } @booklet {1425, title = {Problems in application of latent-trait models to tailored testing (Research Report 79-1)}, year = {1979}, address = {Columbia MO: University of Missouri, Department of Psychology", (also presented at National Council on Measurement in Education, 1979: ERIC No. ED 177 196) note = "}, author = {Koch, W. J. and Reckase, M. D.} } @book {1679, title = {The Rasch model in computerized personality testing}, year = {1979}, address = {Ph.D. dissertation, University of Missouri, Columbia, 1979}, author = {Kunce, C. S.} } @conference {960, title = {Student reaction to computerized adaptive testing in the classroom}, booktitle = {Paper presented at the 87th annual meeting of the American Psychological Association}, year = {1979}, note = {$\#$JO79-01}, address = {New York}, author = {Johnson, M. J.} } @conference {1039, title = {An adaptive test designed for paper-and-pencil testing}, booktitle = {Presentation to the convention of the Western Psychological Association}, year = {1978}, address = {San Francisco, CA}, author = {J. R. McBride} } @inbook {1845, title = {Applications of latent trait theory to criterion-referenced testing}, year = {1978}, address = {D.J. Weiss (Ed.), Proceedings of the 1977 Computerized Adaptive Testing Conference. Minneapolis, MN: University of Minnesota.}, author = {J. R. McBride} } @inbook {1765, title = {Applications of sequential testing procedures to performance testing}, year = {1978}, address = {D. J. Weiss (Ed.), Proceedings of the 1977 Computerized Adaptive Testing Conference. Minneapolis MN: University of Minnesota, Department of Psychology, Psychometric Methods Program.}, author = {Epstein, K. I. and Knerr, C. S.} } @article {707, title = {Combining auditory and visual stimuli in the adaptive testing of speech discrimination}, journal = {Journal of Speech and Hearing Disorders}, volume = {43}, year = {1978}, pages = {115-122}, author = {Steele, J. A. and Binnie, C. A. and Cooper, W. A.} } @book {1682, title = {A comparison of Bayesian and maximum likelihood scoring in a simulated stradaptive test}, year = {1978}, address = {Unpublished Masters thesis, St. Mary{\textquoteright}s University of Texas, San Antonio TX}, author = {Maurelli, V. A.} } @booklet {1493, title = {A comparison of the fairness of adaptive and conventional testing strategies (Research Report 78-1)}, year = {1978}, address = {Minneapolis: University of Minnesota, Department of Psychology, Psychometric Methods Program}, author = {Pine, S. M. and Weiss, D. J.} } @article {694, title = {Computer-assisted tailored testing: Examinee reactions and evaluation}, journal = {Educational and Psychological Measurement}, volume = {38}, year = {1978}, pages = {265-273}, author = {Schmidt, F. L. and Urry, V. W. and Gugel, J. F.} } @article {601, title = {Computerized adaptive testing: Principles and directions}, journal = {Computers and Education}, volume = {2 (4)}, year = {1978}, pages = {319-329}, author = {Kreitzberg, C. B.} } @article {602, title = {Computerized adaptive testing: Principles and directions}, journal = {Computers and Education}, volume = {2}, year = {1978}, pages = {319-329}, author = {Kreitzberg, C. B. and Stocking, M., and Swanson, L.} } @booklet {1324, title = {A construct validation of adaptive achievement testing (Research Report 78-4)}, year = {1978}, address = {Minneapolis MN: University of Minnesota, Department of Psychology, Psychometric Methods Program, Computerized Adaptive Testing Laboratory}, author = {Bejar, I. I. and Weiss, D. J.} } @booklet {1353, title = {Evaluations of implied orders as a basis for tailored testing using simulations (Technical Report No. 4)}, year = {1978}, note = {$\#$CL77-04}, address = {Los Angeles CA: University of Southern California, Department of Psychology.}, author = {Cliff, N. A. and Cudeck, R. and McCormick, D.} } @conference {1108, title = {A generalization of sequential analysis to decision making with tailored testing}, booktitle = {Paper presented at the meeting of the Military Testing Association}, year = {1978}, address = {Oklahoma City OK}, author = {Reckase, M. D.} } @booklet {1354, title = {Implied orders as a basis for tailored testing (Technical Report No. 6)}, year = {1978}, note = {$\#$CL78-06}, address = {Los Angeles CA: University of Southern California, Department of Psychology.}, author = {Cliff, N. A. and Cudeck, R. and McCormick, D.} } @booklet {1424, title = {A live tailored testing comparison study of the one- and three-parameter logistic models (Research Report 78-1)}, year = {1978}, address = {Columbia MO: University of Missouri, Department of Psychology}, author = {Koch, W. J. and Reckase, M. D.} } @proceedings {380, title = {A model for testing with multidimensional items}, journal = {Proceedings of the 1977 Computerized Adaptive Testing Conference}, year = {1978}, month = {06/1978}, pages = {82-98}, publisher = {University of Minnesota, Department of Psychology, Psychometrics Methods Program}, address = {Minneapolis, MN. USA}, author = {Sympson, J. B.} } @inbook {1830, title = {Panel discussion: Future directions for computerized adaptive testing}, year = {1978}, address = {D. J. Weiss (Ed.), Proceedings of the 1977 Item Response Theory and Computerized adaptive conference. Minneapolis: University of Minnesota, Department of Psychology, Psychometric Methods Program, Computerized Adaptive Testing Laboratory.}, author = {Lord, F. M.,} } @article {489, title = {Predictive ability of a branching test}, journal = {Educational and Psychological Measurement}, volume = {38}, year = {1978}, pages = {415-419}, author = {Brooks, S. and Hartz, M. A.} } @book {1718, title = {Proceedings of the 1977 Computerized Adaptive Testing Conference}, year = {1978}, address = {Minneapolis: University of Minnesota, Department of Psychology, Computerized Adaptive Testing Laboratory}, author = {Weiss, D. J.} } @article {735, title = {The stratified adaptive ability test as a tool for personnel selection and placement}, journal = {TIMS Studies in the Management Sciences}, volume = {8}, year = {1978}, pages = {135-151}, author = {Vale, C. D. and Weiss, D. J.} } @article {2240, title = {A stratified adaptive test of verbal ability}, journal = {Japanese Journal of Educational Psychology}, volume = {26}, year = {1978}, pages = {229-238}, author = {Shiba, S. and Noguchi, H. and Haebra, T.} } @inbook {1973, title = {Adaptive Branching in a Multi-Content Achievement Test}, year = {1977}, address = {D. J. Weiss (Ed.), Proceedings of the 1977 Computerized Adaptive Testing Conference. Minneapolis MN: University of Minnesota, Department of Psychology, Psychometric Methods Program}, author = {Pennell, R. J. and Harris, D. A.} } @inbook {1842, title = {An adaptive test of arithmetic reasoning}, year = {1977}, address = {the Proceedings of the Nineteenth Military Testing Association conference, San Antonio, TX.}, author = {J. R. McBride} } @inbook {1918, title = {Adaptive testing and the problem of classification}, year = {1977}, note = {28 MB}}, address = {D. Weiss (Ed.), Applications of computerized adaptive testing (Research Report 77-1). Minneapolis MN: University of Minnesota, Department of Psychology, Psychometric Methods Program.}, author = {Vale, C. D.} } @inbook {1970, title = {Adaptive Testing Applied to Hierarchically Structured Objectives-Based Programs}, year = {1977}, address = {D. J. Weiss (Ed.), Proceedings of the 1977 Computerized Adaptive Testing Conference. Minneapolis MN: University of Minnesota, Department of Psychology, Psychometric Methods Program}, author = {Hambleton, R. K. and Eignor, D. R.} } @booklet {1341, title = {An adaptive testing strategy for achievement test batteries (Research Rep No 77-6)}, year = {1977}, note = {{PDF file, 2.40 MB}}, address = {Minneapolis: University of Minnesota, Department of Psychology, Psychometric Methods Program}, author = {Brown, J. M and Weiss, D. J.} } @article {540, title = {Application of tailored testing to achievement measurement}, journal = {Behavior Research Methods and Instrumentation}, volume = {9}, year = {1977}, pages = {158-161}, author = {English, R. A. and Reckase, M. D. and Patience, W. M.} } @book {1675, title = {An application of the Rasch one-parameter logistic model to individual intelligence testing in a tailored testing environment}, year = {1977}, address = {Dissertation Abstracts International, 37 (9-A), 5766}, author = {Ireland, C. M.} } @inbook {1733, title = {Applications of adaptive testing in measuring achievement and performance}, year = {1977}, note = {28 MB}}, address = {D. J. Weiss (Ed.), Applications of computerized adaptive testing (Research Report 77-1). Minneapolis MN: University of Minnesota, Department of Psychology, Psychometric Methods Program. }, author = {Bejar, I. I.} } @booklet {1568, title = {Applications of computerized adaptive testing (Research Report 77-1)}, year = {1977}, note = {{PDF file, 3.228 KB}}, address = {Minneapolis MN: University of Minnesota, Department of Psychology, Psychometric Methods Program, Computerized Adaptive Testing Laboratory}, author = {Weiss, D. J.} } @proceedings {120, title = {Applications of sequential testing procedures to performance testing}, journal = {1977 Computerized Adaptive Testing Conference}, year = {1977}, publisher = {University of Minnesota}, address = {Minneapolis, MN. USA}, author = {Epstein, K. I. and Knerr, C. S.} } @article {582, title = {Bayesian tailored testing and the influence of item bank characteristics}, journal = {Applied Psychological Measurement}, volume = {1}, year = {1977}, pages = {111-120}, author = {Jensema, C J} } @article {2002, title = {Bayesian Tailored Testing and the Influence of Item Bank Characteristics}, journal = {Applied Psychological Measurement}, volume = {1}, year = {1977}, pages = {111-120}, author = {Jensema, C J} } @inbook {1841, title = {A brief overview of adaptive testing}, year = {1977}, note = {28 MB}}, address = {D. J. Weiss (Ed.), Applications of computerized testing (Research Report 77-1). Minneapolis: University of Minnesota, Department of Psychology, Psychometric Methods Program}, author = {J. R. McBride} } @article {2001, title = {A Broad-Range Tailored Test of Verbal Ability }, journal = {Applied Psychological Measurement}, volume = {1}, year = {1977}, pages = {95-100}, author = {Lord, F M} } @article {623, title = {A broad-range tailored test of verbal ability}, journal = {Applied Psychological Measurement}, volume = {1}, year = {1977}, pages = {95-100}, author = {Lord, F. M.,} } @booklet {1327, title = {Calibration of an item pool for the adaptive measurement of achievement (Research Report 77-5)}, year = {1977}, address = {Minneapolis: Department of Psychology, Psychometric Methods Program}, author = {Bejar, I. I. and Weiss, D. J. and Kingsbury, G. G.} } @inbook {1734, title = {A comparison of conventional and adaptive achievement testing}, year = {1977}, address = {D. J. Weiss (Ed.), Proceedings of the 1977 Computerized Adaptive Testing Conference. Minneapolis MN: University of Minnesota, Department of Psychology, Psychometric Methods Program.}, author = {Bejar, I. I.} } @book {1651, title = {A comparison of the classification of students by two methods of administration of a mathematics placement test}, year = {1977}, address = {Unpublished doctoral dissertation, Syracuse University, 1977}, author = {Brooks, S.} } @book {1697, title = {A computer adaptive approach to the measurement of personality variables}, year = {1977}, address = {Unpublished doctoral dissertation, University of Maryland, Baltimore}, author = {Sapinkopf, R. C.} } @article {706, title = {A computer simulation study of tailored testing strategies for objective-based instructional programs}, journal = {Educational and Psychological Measurement}, volume = {37}, year = {1977}, pages = {139-158}, abstract = {One possible way of reducing the amount of time spent testing in . objective-based instructional programs would involve the implementation of a tailored testing strategy. Our purpose was to provide some additional data on the effectiveness of various tailored testing strategies for different testing situations. The three factors of a tailored testing strategy under study with various hypothetical distributions of abilities across two learning hierarchies were test length, mastery cutting score, and starting point. Overall, our simulation results indicate that it is possible to obtain a reduction of more than 50\% in testing time without any loss in decision-making accuracy, when compared to a conventional testing procedure, by implementing a tailored testing strategy. In addition, our study of starting points revealed that it was generally best to begin testing in the middle of the learning hierarchy. Finally we observed a 40\% reduction in errors of classification as the number of items for testing each objective was increased from one to five.}, author = {Spineti, J. P. and Hambleton, R. K.} } @booklet {1596, title = {Computer-assisted tailored testing: Examinee reactions and evaluation (PB-276 748)}, year = {1977}, note = {$\#$SC77-01}, address = {Washington DC: U. S. Civil Service Commission, Personnel Research and Development Center.}, author = {Schmidt, F. L. and Urry, V. W. and Gugel, J. F.} } @inbook {1979, title = {Computerized Adaptive Testing and Personnel Accessioning System Design}, year = {1977}, address = {D. J. Weiss (Ed.), Proceedings of the 1977 Computerized Adaptive Testing Conference. Minneapolis MN: University of Minnesota, Department of Psychology, Psychometric Methods Program.}, author = {Underwood, M. A.} } @inbook {1843, title = {Computerized Adaptive Testing research and development}, year = {1977}, address = {H. Taylor, Proceedings of the Second Training and Personnel Technology Conference. Washington, DC: Office of the Director of Defense Research and Engineering.}, author = {J. R. McBride} } @inbook {1969, title = {Computerized Adaptive Testing with a Military Population}, year = {1977}, address = {D. J. Weiss (Ed.), Proceedings of the 1977 Computerized Adaptive Testing Conference. Minneapolls MN: University of Minnesota, Department of Psychology, Psychometric Methods Program}, author = {Gorman, S.} } @article {658, title = {Description of components in tailored testing}, journal = {Behavior Research Methods and Instrumentation}, volume = {9}, year = {1977}, pages = {153-157}, author = {Patience, W. M.} } @article {481, title = {Effects of immediate knowledge of results and adaptive testing on ability test performance}, journal = {Applied Psychological Measurement}, volume = {2}, year = {1977}, pages = {259-266}, author = {Betz, N. E.} } @article {2006, title = {Effects of Immediate Knowledge of Results and Adaptive Testing on Ability Test Performance}, journal = {Applied Psychological Measurement}, volume = {1}, year = {1977}, pages = {259-266}, author = {Betz, N. E.} } @inbook {1974, title = {Effects of Knowledge of Results and Varying Proportion Correct on Ability Test Performance and Psychological Variables}, year = {1977}, address = {D. J. Weiss (Ed.), Proceedings of the 1977 Computerized Adaptive Testing Conference. Minneapolis MN: University of Minnesota, Department of Psychology, Psychometric Methods Program.}, author = {Prestwood, J. S.} } @inbook {1746, title = {An empirical evaluation of implied orders as a basis for tailored testing}, year = {1977}, address = {D. J. Weiss (Ed.), Proceedings of the 1977 Computerized Adaptive Testing Conference. Minneapolis MN: University of Minnesota, Department of Psychology, Psychometric Methods Program.}, author = {Cliff, N. A. and Cudeck, R. and McCormick, D.} } @article {2004, title = {An Empirical Investigation of the Stratified Adaptive Computerized Testing Model}, journal = {Applied Psychological Measurement}, volume = {1}, year = {1977}, pages = {141-152}, author = {B. K. Waters} } @inbook {1902, title = {Estimation of latent trait status in adaptive testing}, year = {1977}, note = {28 MB}}, address = {D. J. Weiss (Ed.), Applications of computerized testing (Research Report 77-1). Minneapolis: University of Minnesota, Department of Psychology, Psychometric Methods Program}, author = {Sympson, J. B.} } @booklet {1397, title = {Flexilevel adaptive testing paradigm: Validation in technical training}, year = {1977}, address = {AFHRL Technical Report 77-35 (I)}, author = {Hansen, D. N. and Ross, S. and Harris, D. A.} } @booklet {1398, title = {Flexilevel adaptive training paradigm: Hierarchical concept structures}, year = {1977}, address = {AFHRL Technical Report 77-35 (II)}, author = {Hansen, D. N. and Ross, S. and Harris, D. A.} } @conference {951, title = {Four realizations of pyramidal adaptive testing strategies}, booktitle = {Paper presented at the Third International Symposium on Educational Testing}, year = {1977}, note = {$\#$HO77-01}, address = {University of Leiden, The Netherlands}, author = {Hornke, L. F.} } @conference {2226, title = {Group tailored tests and some problems of their utlization}, booktitle = {Third international Symposium on Educational testing}, year = {1977}, month = {06/1977}, address = {Leyden, The Netherlands}, author = {Lewy, A and Doron, R} } @inbook {1976, title = {Implementation of a Model Adaptive Testing System at an Armed Forces Entrance and Examination Station}, year = {1977}, address = {D. J. Weiss (Ed.), Proceedings of the 1977 Computerized Adaptive Testing Conference. Minneapolis MN: University of Minnesota, Department of Psychology, Psychometric Methods Program.}, author = {Ree, M. J.} } @inbook {1972, title = {Implementation of Tailored Testing at the Civil Service Commission}, year = {1977}, address = {D. J. Weiss (Ed.), Proceedings of the 1977 Computerized Adaptive Testing Conference. Minneapolis MN: University of Minnesota, Department of Psychology, Psychometric Methods Program}, author = {McKillip, R. H.} } @booklet {1326, title = {An information comparison of conventional and adaptive tests in the measurement of classroom achievement (Research Report 77-7)}, year = {1977}, address = {Minneapolis: Department of Psychology, Psychometric Methods Program}, author = {Bejar, I. I. and Weiss, D. J. and Gialluca, K. A.} } @inbook {1971, title = {A Low-Cost Terminal Usable for Computerized Adaptive Testing}, year = {1977}, address = {D. J. Weiss (Ed.), Proceedings of the 1977 Computerized Adaptive Testing Conference. Minneapolis MN: University of Minnesota, Department of Psychology, Psychometric Methods Program}, author = {Lamos, J. P. and B. K. Waters} } @inbook {1901, title = {A model for testing with multidimensional items}, year = {1977}, address = {D. J. Weiss (Ed.), Proceedings of the 1977 Computerized Adaptive Testing Conference. Minneapolis MN: University of Minnesota, Department of Psychology, Psychometric Methods Program.}, author = {Sympson, J. B.} } @inbook {1981, title = {Multi-Content Adaptive Measurement of Achievement}, year = {1977}, address = {D. J. Weiss (Ed.), Proceedings of the 1977 Computerized Adaptive Testing Conference. Minneapolis MN: University of Minnesota, Department of Psychology, Psychometric Methods Program.}, author = {Weiss, D. J. and Brown, J. M} } @inbook {1980, title = {A Multivariate Model Sampling Procedure and a Method of Multidimensional Tailored Testing}, year = {1977}, address = {D. J. Weiss (Ed.), Proceedings of the 1977 Computerized Adaptive Testing Conference. Minneapolis MN: University of Minnesota, Department of Psychology, Psychometric Methods Program.}, author = {Urry, V. W.} } @inbook {1977, title = {Operational Considerations in Implementing Tailored Testing}, year = {1977}, address = {D. J. Weiss (Ed.), Proceedings of the 1977 Computerized Adaptive Testing Conference. Minneapolis MN: University of Minnesota, Department of Psychology, Psychometric Methods Program.}, author = {Segal, H.} } @article {675, title = {Procedures for computerized testing}, journal = {Behavior Research Methods and Instrumentation}, volume = {70}, year = {1977}, pages = {351-356}, author = {Reckase, M. D.} } @booklet {1528, title = {A rapid item search procedure for Bayesian adaptive testing (Research Report 77-4)}, year = {1977}, address = {Minneapolis: University of Minnesota, Department of Psychology, Psychometric Methods Program}, author = {Vale, C. D. and Weiss, D. J.} } @conference {2223, title = {Real-data simulation of a proposal for tailored teting}, booktitle = {Third International Conference on Educational Testing}, year = {1977}, month = {06/1977}, address = {Leyden, The Netherlands}, author = {Killcross, M. C.} } @inbook {1975, title = {Reduction of Test Bias by Adaptive Testing}, year = {1977}, address = {D. J. Weiss (Ed.), Proceedings of the 1977 Computerized Adaptive Testing Conference. Minneapolis MN: University of Minnesota, Department of Psychology, Psychometric Methods Program.}, author = {Pine, S. M.} } @article {2003, title = {Some Properties of a Bayesian Adaptive Ability Testing Strategy}, journal = {Applied Psychological Measurement}, volume = {1}, year = {1977}, pages = {121-140}, author = {J. R. McBride} } @article {635, title = {Some properties of a Bayesian adaptive ability testing strategy}, journal = {Applied Psychological Measurement}, volume = {1}, year = {1977}, pages = {121-140}, author = {J. R. McBride} } @inbook {1813, title = {Student attitudes toward tailored testing}, year = {1977}, address = {D. J. Weiss (Ed.), Proceedings of the 1977 Computerized Adaptive Testing Conference. Minneapolis MN: University of Minnesota, Department of Psychology, Psychometric Methods Program.}, author = {Koch, W. R. and Patience, W. M.} } @article {519, title = {TAILOR: A FORTRAN procedure for interactive tailored testing}, journal = {Educational and Psychological Measurement}, volume = {37}, year = {1977}, pages = {767-769}, author = {Cudeck, R. A. and Cliff, N. A. and Kehoe, J.} } @article {638, title = {TAILOR-APL: An interactive computer program for individual tailored testing}, journal = {Educational and Psychological Measurement}, volume = {37}, year = {1977}, pages = {771-774}, author = {McCormick, D. and Cliff, N. A.} } @booklet {1522, title = {Tailored testing: A spectacular success for latent trait theory (TS 77-2)}, year = {1977}, address = {Washington DC: U. S. Civil Service Commission, Personnel Research and Development Center}, author = {Urry, V. W.} } @article {733, title = {Tailored testing: A successful application of latent trait theory}, journal = {Journal of Educational Measurement}, volume = {14}, year = {1977}, pages = {181-196}, author = {Urry, V. W.} } @article {515, title = {A theory of consistency ordering generalizable to tailored testing}, journal = {Psychometrika}, year = {1977}, pages = {375-399}, author = {Cliff, N. A.} } @booklet {1359, title = {A two-stage testing procedure (Memorandum 403-77)}, year = {1977}, address = {University of Leyden, The Netherlands, Educational Research Center}, author = {de Gruijter, D. N. M.} } @article {2005, title = {A Use of the Information Function in Tailored Testing}, journal = {Applied Psychological Measurement}, volume = {1}, year = {1977}, pages = {233-247}, author = {Samejima, F.} } @booklet {1459, title = {Adaptive mental testing: The state of the art (Technical Report 423)}, year = {1976}, note = {.}, address = {Washington DC: U.S. Army Research Institute for the Social and Behavioral Sciences.}, author = {J. R. McBride} } @article {817, title = {Adaptive testing: A Bayesian procedure for the efficient measurement of ability}, journal = {Programmed Learning and Educational Technology}, volume = {13(2)}, year = {1976}, pages = {36}, author = {Wood, R.} } @inbook {1946, title = {Adaptive testing research at Minnesota: Overview, recent results, and future directions}, year = {1976}, note = {{PDF file, 768 KB}}, address = {C. L. Clark (Ed.), Proceedings of the First Conference on Computerized Adaptive Testing (pp. 24-35). Washington DC: United States Civil Service Commission.}, author = {Weiss, D. J.} } @inbook {1839, title = {Adaptive testing research at Minnesota: Some properties of a Bayesian sequential adaptive mental testing strategy}, year = {1976}, note = {{PDF file, 960 KB}}, address = {C. K. Clark (Ed.), Proceedings of the First Conference on Computerized Adaptive Testing (pp. 36-53). Washington DC: U.S. Government Printing Office.}, author = {J. R. McBride} } @inbook {1840, title = {Bandwidth, fidelity, and adaptive tests}, year = {1976}, note = {PDF file, 783 K}, address = {T. J. McConnell, Jr. (Ed.), CAT/C 2 1975: The second conference on computer-assisted test construction. Atlanta GA: Atlanta Public Schools.}, author = {J. R. McBride} } @inbook {1801, title = {Bayesian tailored testing and the influence of item bank characteristics}, year = {1976}, note = {{PDF file, 370 KB}}, address = {C. K. Clark (Ed.), Proceedings of the First Conference on Computerized Adaptive Testing (pp. 82-89). Washington DC: U.S. Government Printing Office.}, author = {Jensema, C J} } @inbook {1827, title = {A broad range tailored test of verbal ability}, year = {1976}, note = {$\#$LO75-01 {PDF file, 250 KB}}, address = {C. K. Clark (Ed.), Proceedings of the First Conference on Computerized Adaptive Testing (pp. 75-78). Washington DC: U.S. Government Printing Office.}, author = {Lord, F. M.,} } @inbook {1855, title = {Computer-assisted testing: An orderly transition from theory to practice}, year = {1976}, note = {{PDF file, 191 KB}}, address = {C. K. Clark (Ed.), Proceedings of the First Conference on Computerized Adaptive Testing (pp. 95-96). Washington DC: U.S. Government Printing Office.}, author = {McKillip, R. H. and Urry, V. W.} } @booklet {1521, title = {Computer-assisted testing with live examinees: A rendezvous with reality (TN 75-3)}, year = {1976}, address = {Washington DC: U. S. Civil Service Commission, Personnel Research and Development Center}, author = {Urry, V. W.} } @inbook {1828, title = {Discussion}, year = {1976}, note = {{PDF file, 318 KB}}, address = {C. K. Clark (Ed.), Proceedings of the First Conference on Computerized Adaptive Testing (pp. 113-117). Washington DC: U.S. Government Printing Office.}, author = {Lord, F. M.,} } @inbook {1779, title = {Discussion}, year = {1976}, note = {{PDF file, 347 KB}}, address = {C. K. Clark (Ed.), Proceedings of the First Conference on Computerized Adaptive Testing (pp. pp. 118-119). Washington DC: U.S. Government Printing Office.}, author = {Green, B. F.} } @conference {1106, title = {The effect of item pool characteristics on the operation of a tailored testing procedure}, booktitle = {Paper presented at the annual meeting of the Psychometric Society}, year = {1976}, address = {Murray Hill NJ}, author = {Reckase, M. D.} } @inbook {1784, title = {Effectiveness of the ancillary estimation procedure}, year = {1976}, note = {{PDF file, 252 KB}}, address = {C. K. Clark (Ed.), Proceedings of the First Conference on Computerized Adaptive Testing (pp. 103-106). Washington DC: U.S. Government Printing Office.}, author = {Gugel, J. F. and Schmidt, F. L. and Urry, V. W.} } @booklet {1333, title = {Effects of immediate knowledge of results and adaptive testing on ability test performance (Research Report 76-3)}, year = {1976}, address = {Minneapolis: University of Minnesota, Department of Psychology, Psychometric Methods Program, Computerized Adaptive Testing Laboratory}, author = {Betz, N. E. and Weiss, D. J.} } @booklet {1352, title = {Elements of a basic test theory generalizable to tailored testing}, year = {1976}, address = {Unpublished manuscript}, author = {Cliff, N. A.} } @inbook {1943, title = {An empirical investigation of Weiss{\textquoteright} stradaptive testing model}, year = {1976}, note = {$\#$WA75-01 {PDF file, 576 KB}}, address = {C. L. Clark (Ed.), Proceedings of the First Conference on Computerized Adaptive Testing (pp. 54-63.). Washington DC: U. S. Civil Service Commission.}, author = {B. K. Waters} } @mastersthesis {2239, title = {An exploratory studyof the efficiency of the flexilevel testing procedure}, volume = {Doctoral}, year = {1976}, school = {University of Toronto}, address = {Toronto, Canada}, author = {Seguin, S. P.} } @inbook {1916, title = {A five-year quest: Is computerized adaptive testing feasible?}, year = {1976}, note = {{PDF file, 453 KB}}, address = {C. K. Clark (Ed.), Proceedings of the First Conference on Computerized Adaptive Testing (pp. 97-102). Washington DC: U.S. Government Printing Office.}, author = {Urry, V. W.} } @inbook {1880, title = {The graded response model of latent trait theory and tailored testing}, year = {1976}, address = {C. K. Clark (Ed.), Proceedings of the First Conference on Computerized Adaptive Testing (pp. 5-17). Washington DC: U.S. Government Printing Office.}, author = {Samejima, F.} } @article {531, title = {Hardware and software evolution of an adaptive ability measurement system}, journal = {Behavior Research Methods and Instrumentation}, volume = {8}, year = {1976}, pages = {104-107}, author = {DeWitt, L. J. and Weiss, D. J.} } @book {1656, title = {Incomplete orders and computerized testing}, year = {1976}, note = {{PDF file, 373 KB}}, address = {In C. K. Clark (Ed.), Proceedings of the First Conference on Computerized Adaptive Testing (pp. 18-23). Washington DC: U.S. Government Printing Office.}, author = {Cliff, N. A.} } @inbook {1883, title = {Item parameterization procedures for the future}, year = {1976}, note = {{PDF file, 331 KB}}, address = {C. K. Clark (Ed.), Proceedings of the First Conference on Computerized Adaptive Testing (pp. 107-112.). Washington DC: U.S. Government Printing Office.}, author = {Schmidt, F. L. and Urry, V. W.} } @booklet {1356, title = {Monte carlo results from a computer program for tailored testing (Technical Report No. 2)}, year = {1976}, note = {$\#$CU76-02}, address = {Los Angeles CA: University of California, Department of Psychology.}, author = {Cudeck, R. A. and Cliff, N. A. and Reynolds, T. J. and McCormick, D. J.} } @inbook {1776, title = {Opening remarks}, year = {1976}, address = {W. H. Gorham (Chair), Computers and testing: Steps toward the inevitable conquest (PS 76-1). Symposium presented at the 83rd annual convention of the APA, Chicago IL. Washington DC: U.S. Civil Service Commission, Personnel Research and Developement Center}, author = {Gorham, W. A.} } @conference {1107, title = {Procedures for computerized testing}, booktitle = {Paper presented at the sixth annual meeting of the National Conference on the Use of On-Line Computers in Psychology}, year = {1976}, note = {$\#$RE76-01}, address = {St. Louis MO}, author = {Reckase, M. D.} } @book {1655, title = {Proceedings of the first conference on computerized adaptive testing}, year = {1976}, note = {{Complete document: PDF file, 7.494 MB; Table of contents and separate papers}}, address = {Washington DC: U.S. Government Printing Office}, author = {Clark, C. K.} } @booklet {1334, title = {Psychological effects of immediate knowledge of results and adaptive ability testing (Research Report 76-4)}, year = {1976}, address = {Minneapolis: University of Minnesota, Department of Psychology, Psychometric Methods Program, Computerized Adaptive Testing Laboratory}, author = {Betz, N. E. and Weiss, D. J.} } @inbook {1791, title = {Reflections on adaptive testing}, year = {1976}, note = {{PDF file, 464 KB}}, address = {C. K. Clark (Ed.), Proceedings of the First Conference on Computerized Adaptive Testing (pp. 90-94). Washington DC: U.S. Government Printing Office.}, author = {Hansen, D. N.} } @booklet {1460, title = {Research on adaptive testing 1973-1976: A review of the literature}, year = {1976}, address = {Unpublished manuscript, University of Minnesota}, author = {J. R. McBride} } @booklet {1419, title = {A review of research in tailored testing (Report APRE No}, year = {1976}, address = {9/76, Farnborough, Hants, U. K.: Ministry of Defence, Army Personnel Research Establishment.)}, author = {Killcross, M. C.} } @book {1683, title = {Simulation studies of adaptive testing: A comparative evaluation}, year = {1976}, address = {Unpublished doctoral dissertation, University of Minnesota, Minneapolis, MN}, author = {J. R. McBride} } @inbook {1829, title = {Some likelihood functions found in tailored testing}, year = {1976}, note = {{PDF file, 166 KB}}, address = {C. K. Clark (Ed.), Proceedings of the First Conference on Computerized Adaptive Testing (pp. 79-81). Washington DC: U.S. Government Printing Office.}, author = {Lord, F. M.,} } @booklet {1472, title = {Some properties of a Bayesian adaptive ability testing strategy (Research Report 76-1)}, year = {1976}, address = {Minneapolis MN: Department of Psychology, Computerized Adaptive Testing Laboratory}, author = {J. R. McBride and Weiss, D. J.} } @booklet {1450, title = {Test theory and the public interest}, year = {1976}, address = {Proceedings of the Educational Testing Service Invitational Conference}, author = {Lord, F. M.,} } @inbook {1748, title = {Using computerized tests to add new dimensions to the measurement of abilities which are important for on-job performance: An exploratory study}, year = {1976}, note = {{PDF file, 632 KB}}, address = {C. K. Clark (Ed.), Proceedings of the First Conference on Computerized Adaptive Testing (pp. 64-74). Washington DC: U.S. Government Printing Office.}, author = {Cory, C. H.} } @booklet {1351, title = {A basic test theory generalizable to tailored testing (Technical Report No 1)}, year = {1975}, address = {Los Angeles CA: University of Southern California, Department of Psychology.}, author = {Cliff, N. A.} } @article {656, title = {A Bayesian sequential procedure for quantal response in the context of adaptive mental testing}, journal = {Journal of the American Statistical Association}, volume = {70}, year = {1975}, pages = {351-356}, author = {Owen, R. J.} } @conference {1135, title = {Behavior of the maximum likelihood estimate in a simulated tailored testing situation}, booktitle = {Paper presented at the annual meeting of the Psychometric Society}, year = {1975}, note = {{PDF file, 698 KB}}, address = {Iowa City}, author = {Samejima, F.} } @booklet {1581, title = {Best test design and self-tailored testing (Research Memorandum No 19)}, year = {1975}, address = {Chicago: University of Chicago, Department of Education, Statistical Laboratory.}, author = {Wright, B. D. and Douglas, G. A.} } @booklet {1449, title = {A broad range test of verbal ability (RB-75-5)}, year = {1975}, address = {Princeton NJ: Educational Testing Service}, author = {Lord, F. M.,} } @article {514, title = {Complete orders from incomplete data: Interactive ordering and tailored testing}, journal = {Psychological Bulletin}, volume = {82}, year = {1975}, pages = {259-302}, author = {Cliff, N. A.} } @article {799, title = {Computerized adaptive ability measurement}, journal = {Naval Research Reviews}, volume = {28}, year = {1975}, pages = {1-18}, author = {Weiss, D. J.} } @inbook {1945, title = {Computerized adaptive trait measurement: Problems and prospects (Research Report 75-5)}, year = {1975}, address = {Minneapolis: University of Minnesota, Department of Psychology, Psychometric Methods Program.}, author = {Weiss, D. J.} } @inbook {1824, title = {Discussion}, year = {1975}, note = {{PDF file, 414 KB}}, address = {D. J. Weiss (Ed.), Computerized adaptive trait measurement: Problems and Prospects (Research Report 75-5), pp. 44-46. Minneapolis: University of Minnesota, Department of Psychology, Psychometric Methods Program.}, author = {Linn, R. L.} } @inbook {1739, title = {Discussion}, year = {1975}, note = {{PDF file, 414 KB}}, address = {D. J. Weiss (Ed.), Computerized adaptive trait measurement: Problems and Prospects (Research Report 75-5), pp. 46-49. Minneapolis: University of Minnesota, Department of Psychology, Psychometric Methods Program.}, author = {Bock, R. D.} } @conference {1105, title = {The effect of item choice on ability estimation when using a simple logistic tailored testing model}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association}, year = {1975}, address = {Washington, D.C.}, author = {Reckase, M. D.} } @booklet {1332, title = {Empirical and simulation studies of flexilevel ability testing (Research Report 75-3)}, year = {1975}, address = {Minneapolis: Department of Psychology, Psychometric Methods Program}, author = {Betz, N. E. and Weiss, D. J.} } @booklet {1430, title = {An empirical comparison of two-stage and pyramidal ability testing (Research Report 75-1)}, year = {1975}, address = {Minneapolis: University of Minnesota, Department of Psychology, Psychometric Methods Program}, author = {Larkin, K. C. and Weiss, D. J.} } @inbook {1900, title = {Evaluating the results of computerized adaptive testing}, year = {1975}, note = {{PDF file, 446 KB}}, address = {D. J. Weiss (Ed.), Computerized adaptive trait measurement: Problems and Prospects (Research Report 75-5), pp. 26-31. Minneapolis: University of Minnesota, Department of Psychology, Psychometric Methods Program.}, author = {Sympson, J. B.} } @inbook {1736, title = {New types of information and psychological implications}, year = {1975}, note = {{PDF file, 609 KB}}, address = {D. J. Weiss (Ed.), Computerized adaptive trait measurement: Problems and Prospects (Research Report 75-5), pp. 32-43. Minneapolis: University of Minnesota, Department of Psychology, Psychometric Methods Program.}, author = {Betz, N. E.} } @inbook {1838, title = {Scoring adaptive tests}, year = {1975}, note = {{PDF file, 442 KB}}, address = {D. J. Weiss (Ed.), Computerized adaptive trait measurement: Problems and Prospects (Research Report 75-5), pp. 17-25. Minneapolis MN: University of Minnesota, Department of Psychology, Psychometric Methods Program.}, author = {J. R. McBride} } @article {724, title = {Sequential testing for instructional classification}, journal = {Journal of Computer-Based Instruction.}, volume = {1}, year = {1975}, pages = {92-99}, author = {Thomas, D. B.} } @booklet {1527, title = {A simulation study of stradaptive ability testing (Research Report 75-6)}, year = {1975}, address = {Minneapolis: University of Minnesota, Department of Psychology, Psychometric Methods Program}, author = {Vale, C. D. and Weiss, D. J.} } @inbook {1917, title = {Strategies of branching through an item pool}, year = {1975}, note = {$\#$VA75-01 {PDF file, 600 KB}}, address = {D. J. Weiss (Ed.), Computerized adaptive trait measurement: Problems and Prospects (Research Report 75-5), pp. 1-16. Minneapolis: University of Minnesota, Department of Psychology, Psychometric Methods Program.}, author = {Vale, C. D.} } @booklet {1526, title = {A study of computer-administered stradaptive ability testing (Research Report 75-4)}, year = {1975}, address = {Minneapolis: University of Minnesota, Department of Psychology, Psychometric Methods Program}, author = {Vale, C. D. and Weiss, D. J.} } @conference {886, title = {Tailored testing: Maximizing validity and utility for job selection}, booktitle = {Paper presented at the 86th Annual Convention of the American Psychological Association. Toronto}, year = {1975}, address = {Canada}, author = {Croll, P. R. and Urry, V. W.} } @article {580, title = {An application of latent trait mental test theory}, journal = {British Journal of Mathematical and Statistical Psychology}, volume = {27}, year = {1974}, note = {$\#$JE74029}, pages = {29-48}, author = {Jensema, C J} } @conference {1104, title = {An application of the Rasch simple logistic model to tailored testing}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association.}, year = {1974}, address = {St. Loius MO}, author = {Reckase, M. D.} } @conference {2222, title = {A Bayesian approach in sequential testing}, booktitle = {American Educational Research Association}, year = {1974}, month = {04/1974}, address = {Chicago IL}, author = {Hsu, T. and Pingel, K.} } @book {1677, title = {The comparison of two tailored testing models and the effects of the models variables on actual loss}, year = {1974}, address = {Unpublished doctoral dissertation, Florida State University}, author = {Kalisch, S. J.} } @booklet {1361, title = {A computer software system for adaptive ability measurement (Research Report 74-1)}, year = {1974}, address = {Minneapolis MN: University of Minnesota, Department of Psychology, Computerized Adaptive Testing Laboratory}, author = {De Witt, J. J. and Weiss, D. J.} } @booklet {1520, title = {Computer-assisted testing: The calibration and evaluation of the verbal ability bank (Technical Study 74-3)}, year = {1974}, address = {Washington DC: U. S. Civil Service Commission, Personnel Research and Development Center}, author = {Urry, V. W.} } @booklet {1396, title = {Computer-based adaptive testing models for the Air Force technical training environment: Phase I: Development of a computerized measurement system for Air Force technical Training}, year = {1974}, address = {JSAS Catalogue of Selected Documents in Psychology, 5, 1-86 (MS No. 882). AFHRL Technical Report 74-48.}, author = {Hansen, D. N. and Johnson, B. F. and Fagan, R. L. and Tan, P. and Dick, W.} } @booklet {1318, title = {Development of a programmed testing system (Technical Paper 259)}, year = {1974}, address = {Arlington VA: US Army Research Institute for the Behavioral and Social Sciences. NTIS No. AD A001534)}, author = {Bayroff, A. G. and Ross, R. M and Fischl, M. A} } @booklet {1429, title = {An empirical investigation of computer-administered pyramidal ability testing (Research Report 74-3)}, year = {1974}, address = {Minneapolis: University of Minnesota, Department of Psychology, Psychometric Methods Program}, author = {Larkin, K. C. and Weiss, D. J.} } @conference {2225, title = {An empirical investigation of the stability and accuracy of flexilevel tests}, booktitle = {Annual meeting of the National Council on Measurement in Education}, year = {1974}, month = {03/1074}, address = {Chicago IL}, author = {Kocher, A.T.} } @book {1717, title = {An empirical investigation of the stratified adaptive computerized testing model for the measurement of human ability}, year = {1974}, note = {$\#$WA74-01}, address = {Unpublished Ph.D. dissertation, Florida State University}, author = {B. K. Waters} } @book {1687, title = {An evaluation of the self-scoring flexilevel testing model}, year = {1974}, address = {Unpublished dissertation, Florida State University. Dissertation Abstracts International, 35 (7-A), 4257}, author = {Olivier, P.} } @mastersthesis {2228, title = {An evaluation of the self-scoring flexilevel testing model}, year = {1974}, school = {Florida State University}, type = {Ph.D. Dissertation}, author = {Olivier, P.} } @inbook {1826, title = {Individualized testing and item characteristic curve theory}, year = {1974}, address = {D. H. Krantz, R. C. Atkinson, R. D. Luce, and P. Suppes (Eds.), Contemporary developments in mathematical psychology (Vol. II). San Francisco: Freeman.}, author = {Lord, F. M.,} } @article {674, title = {An interactive computer program for tailored testing based on the one-parameter logistic model}, journal = {Behavior Research Methods and Instrumentation}, volume = {6}, year = {1974}, pages = {208-212}, author = {Reckase, M. D.} } @booklet {1448, title = {Practical methods for redesigning a homogeneous test, also for designing a multilevel test (RB-74-30)}, year = {1974}, address = {Princeton NJ: Educational Testing Service}, author = {Lord, F. M.,} } @inbook {1853, title = {Recent and projected developments in ability testing by computer}, year = {1974}, address = {Earl Jones (Ed.), Symposium Proceedings: Occupational Research and the Navy{\textendash}Prospectus 1980 (TR-74-14). San Diego, CA: Navy Personnel Research and Development Center.}, author = {J. R. McBride and Weiss, D. J.} } @booklet {1331, title = {Simulation studies of two-stage ability testing (Research Report 74-4)}, year = {1974}, note = {{PDF file, 2.92 MB}}, address = {Minneapolis: Department of Psychology, Psychometric Methods Program}, author = {Betz, N. E. and Weiss, D. J.} } @booklet {1567, title = {Strategies of adaptive ability measurement (Research Report 74-5)}, year = {1974}, note = {{PDF file, 5.555 MB}}, address = {Minneapolis: University of Minnesota, Department of Psychology, Psychometric Methods Program}, author = {Weiss, D. J.} } @article {587, title = {A tailored testing model employing the beta distribution and conditional difficulties}, journal = {Journal of Computer-Based Instruction}, volume = {1}, year = {1974}, pages = {22-28}, author = {Kalisch, S. J.} } @booklet {1416, title = {A tailored testing model employing the beta distribution (unpublished manuscript)}, year = {1974}, address = {Florida State University, Educational Evaluation and Research Design Program}, author = {Kalisch, S. J.} } @conference {964, title = {A tailored testing system for selection and allocation in the British Army}, booktitle = {Paper presented at the 18th International Congress of Applied Psychology}, year = {1974}, address = {Montreal Canada}, author = {Killcross, M. C.} } @article {563, title = {Testing and decision-making procedures for selected individualized instruction programs}, journal = {Review of Educational Research}, volume = {10}, year = {1974}, pages = {371-400}, author = {Hambleton, R. K.} } @article {581, title = {The validity of Bayesian tailored testing}, journal = {Educational and Psychological Measurement}, volume = {34}, year = {1974}, pages = {757-756}, author = {Jensema, C J} } @booklet {1471, title = {A word knowledge item pool for adaptive ability measurement (Research Report 74-2)}, year = {1974}, address = {Minneapolis MN: Department of Psychology, Computerized Adaptive Testing Laboratory}, author = {J. R. McBride and Weiss, D. J.} } @booklet {1574, title = {Ability measurement: Conventional or adaptive? (Research Report 73-1)}, year = {1973}, note = {{PDF file, 4.98 MB}.}, address = {Minneapolis: University of Minnesota, Department of Psychology, Psychometric Methods Program}, author = {Weiss, D. J. and Betz, N. E.} } @inbook {1803, title = {Computer-based psychological testing}, year = {1973}, address = {A. Elithorn and D. Jones (Eds.), Artificial and human thinking (pp. 83-93). San Francisco CA: Jossey-Bass.}, author = {Jones, D. and Weinman, J.} } @booklet {1330, title = {An empirical study of computer-administered two-stage ability testing (Research Report 73-4)}, year = {1973}, address = {Minneapolis: Department of Psychology, Psychometric Methods Program}, author = {Betz, N. E. and Weiss, D. J.} } @booklet {1379, title = {Implementation of a Bayesian system for decision analysis in a program of individually prescribed instruction (Research Report No 60)}, year = {1973}, note = {$\#$FE73-01}, address = {Iowa City IA: American College Testing Program}, author = {Ferguson, R. L. and Novick, M. R.} } @conference {1103, title = {An interactive computer program for tailored testing based on the one-parameter logistic model}, booktitle = {Paper presented at the National Conference on the Us of On-Line Computers in Psychology}, year = {1973}, address = {St. Louis MO}, author = {Reckase, M. D.} } @book {1700, title = {A multivariate experimental study of three computerized adaptive testing models for the measurement of attitude toward teaching effectiveness}, year = {1973}, address = {Unpublished doctoral dissertation, Florida State University}, author = {Tam, P. T.-K.} } @booklet {1488, title = {An overview of tailored testing (unpublished manuscript)}, year = {1973}, address = {Florida State University, Program of Educational Evaluation and Research Design}, author = {Olivier, P.} } @conference {2224, title = {The potential use of tailored testing for allocation to army employments}, booktitle = {NATO Conference on Utilisation of Human Resources}, year = {1973}, month = {06/1973}, address = {Lisbon, Portugal}, author = {Killcross, M. C. and Cassie, A} } @article {816, title = {Response-contingent testing}, journal = {Review of Educational Research}, volume = {43}, year = {1973}, pages = {529-544}, author = {Wood, R. L.} } @booklet {1392, title = {A review of testing and decision-making procedures (Technical Bulletin No. 15}, year = {1973}, address = {Iowa City IA: American College Testing Program.}, author = {Hambleton, R. K.} } @booklet {1566, title = {The stratified adaptive computerized ability test (Research Report 73-3)}, year = {1973}, note = {{PDF file, 2.498 MB}}, address = {Minneapolis: University of Minnesota, Department of Psychology, Psychometric Methods Program}, author = {Weiss, D. J.} } @article {586, title = {A tailored testing model employing the beta distribution and conditional difficulties}, journal = {Journal of Computer-Based Instruction}, volume = {1}, year = {1973}, pages = {111-120}, author = {Kalisch, S. J.} } @booklet {1412, title = {An application of latent trait mental test theory to the Washington Pre-College Testing Battery}, year = {1972}, note = {$\#$JE72-01}, address = {Unpublished doctoral dissertation, University of Washington}, author = {Jensema, C J} } @booklet {1580, title = {Fully adaptive sequential testing: A Bayesian procedure for efficient ability measurement}, year = {1972}, address = {Unpublished manuscript, University of Chicago}, author = {Wood, R. L.} } @article {538, title = {Individual intelligence testing without the examiner: reliability of an automated method}, journal = {Journal of Consulting and Clinical Psychology}, volume = {38}, year = {1972}, pages = {9-14}, author = {Elwood, D. L. and Griffin, H.R.} } @booklet {1447, title = {Individualized testing and item characteristic curve theory (RB-72-50)}, year = {1972}, address = {Princeton NJ: Educational Testing Service}, author = {Lord, F. M.,} } @book {1685, title = {A modification to Lord{\textquoteright}s model for tailored tests}, year = {1972}, address = {Unpublished doctoral dissertation, University of Toronto}, author = {Mussio, J. J.} } @article {256, title = {Sequential testing for dichotomous decisions. }, journal = {Educational and Psychological Measurement}, volume = {32}, number = {1}, year = {1972}, pages = {85-95.}, keywords = {CCAT, CLASSIFICATION Computerized Adaptive Testing, sequential probability ratio testing, SPRT}, author = {Linn, R. L. and Rock, D. A. and Cleary, T. A.} } @booklet {1378, title = {The application of item generators for individualizing mathematics testing and instruction (Report 1971/14)}, year = {1971}, address = {Pittsburgh PA: University of Pittsburgh Learning Research and Development Center}, author = {Ferguson, R. L. and Hsu, T.} } @article {795, title = {A comparison of computer-simulated conventional and branching tests}, journal = {Educational and Psychological Measurement}, volume = {31}, year = {1971}, pages = {125-136}, author = {Waters, C. J. and Bayroff, A. G.} } @booklet {1342, title = {A comparison of four methods of selecting items for computer-assisted testing (Technical Bulletin STB 72-5)}, year = {1971}, address = {San Diego: Naval Personnel and Training Research Laboratory}, author = {Bryson, R.} } @booklet {1377, title = {Computer assistance for individualizing measurement}, year = {1971}, address = {Pittsburgh PA: University of Pittsburgh R and D Center}, author = {Ferguson, R. L.} } @book {1723, title = {Computerized adaptive sequential testing}, year = {1971}, address = {Unpublished doctoral dissertation, University of Chicago}, author = {Wood, R. L.} } @booklet {1519, title = {Individualized testing by Bayesian estimation}, year = {1971}, address = {Seattle: University of Washington, Bureau of Testing Project 0171-177}, author = {Urry, V. W.} } @article {544, title = {A model for computer-assisted criterion-referenced measurement}, journal = {Education}, volume = {81}, year = {1971}, pages = {25-31}, author = {Ferguson, R. L.} } @article {620, title = {Robbins-Monro procedures for tailored testing}, journal = {Educational and Psychological Measurement}, volume = {31}, year = {1971}, pages = {3-31}, author = {Lord, F. M.,} } @article {621, title = {The self-scoring flexilevel test}, journal = {Journal of Educational Measurement}, volume = {8}, year = {1971}, pages = {147-151}, author = {Lord, F. M.,} } @booklet {1446, title = {Tailored testing: An application of stochastic approximation (RM 71-2)}, year = {1971}, address = {Princeton NJ: Educational Testing Service}, author = {Lord, F. M.,} } @article {622, title = {Tailored testing, an approximation of stochastic approximation}, journal = {Journal of the American Statistical Association}, volume = {66}, year = {1971}, pages = {707-711}, author = {Lord, F. M.,} } @article {618, title = {A theoretical study of the measurement effectiveness of flexilevel tests}, journal = {Educational and Psychological Measurement}, volume = {31}, year = {1971}, pages = {805-813}, author = {Lord, F. M.,} } @article {619, title = {A theoretical study of two-stage testing}, journal = {Psychometrika}, volume = {36}, year = {1971}, pages = {227-242}, author = {Lord, F. M.,} } @article {792, title = {Adaptive testing of cognitive skills}, journal = {Proceedings of the Annual Convention of the American Psychological Association}, volume = {5 (part 1)}, year = {1970}, pages = {167-168}, author = {Wargo, M. J.} } @inbook {1778, title = {Comments on tailored testing}, year = {1970}, address = {W. H. Holtzman, (Ed.), Computer-assisted instruction, testing, and guidance (pp. 184-197). New York: Harper and Row.}, author = {Green, B. F.} } @booklet {1376, title = {Computer assistance for individualizing measurement}, year = {1970}, address = {Pittsburgh PA: University of Pittsburgh, Learning Research and Development Center}, author = {Ferguson, R. L.} } @article {543, title = {Computer assistance for individualizing measurement}, journal = {Computers and Automation}, volume = {March 1970}, year = {1970}, pages = {19}, author = {Ferguson, R. L.} } @inbook {1793, title = {Individually tailored testing: Discussion}, year = {1970}, note = {$\#$HO70198}, address = {W. H. Holtzman, (Ed.), Computer-assisted instruction, testing, and guidance (pp.198-200). New York: Harper and Row.}, author = {Holtzman, W. H.} } @conference {923, title = {A model for computer-assisted criterion-referenced measurement}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association/National Council on Measurement in Education}, year = {1970}, address = {Minneapolis MN}, author = {Ferguson, R. L.} } @booklet {1445, title = {The self-scoring flexilevel test (RB-7043)}, year = {1970}, address = {Princeton NJ: Educational Testing Service}, author = {Lord, F. M.,} } @booklet {1444, title = {Sequential testing for dichotomous decisions. College Entrance Examination Board Research and Development Report (RDR 69-70, No 3", and Educational Testing Service RB-70-31)}, year = {1970}, note = {$\#$LI70-31}, address = {Princeton NJ: Educational Testing Service.}, author = {Linn, R. L. and Rock, D. A. and Cleary, T. A.} } @inbook {1825, title = {Some test theory for tailored testing}, year = {1970}, note = {$\#$LO70139}, address = {W. H. Holtzman (Ed.), Computer-assisted instruction, testing, and guidance (pp.139-183). New York: Harper and Row.}, author = {Lord, F. M.,} } @article {537, title = {Automation of psychological testing}, journal = {American Psychologist}, volume = {24}, year = {1969}, pages = {287-289}, author = {Elwood, D. L.} } @booklet {1490, title = {A Bayesian approach to tailored testing (Research Report 69-92)}, year = {1969}, address = {Princeton NJ: Educational Testing Service}, author = {Owen, R. J.} } @booklet {1485, title = {Bayesian methods in psychological testing (Research Bulletin RB-69-31)}, year = {1969}, address = {Princeton NJ: Educational Testing Service}, author = {Novick, M. R.} } @booklet {1375, title = {Computer-assisted criterion-referenced measurement (Working Paper No 49)}, year = {1969}, address = {Pittsburgh PA: University of Pittsburgh, Learning and Research Development Center. (ERIC No. ED 037 089)}, author = {Ferguson, R. L.} } @article {617, title = {The development and evaluation of several programmed testing methods}, journal = {Educational and Psychological Measurement}, volume = {29}, year = {1969}, pages = {129-146}, author = {Linn, R. L. and Cleary, T. A.} } @book {1668, title = {The development, implementation, and evaluation of a computer-assisted branched test for a program of individually prescribed instruction}, year = {1969}, address = {Doctoral dissertation, University of Pittsburgh. Dissertation Abstracts International, 30-09A, 3856. (University Microfilms No. 70-4530).}, author = {Ferguson, R. L.} } @article {815, title = {The efficacy of tailored testing}, journal = {Educational Research}, volume = {11}, year = {1969}, pages = {219-222}, author = {Wood, R. L.} } @article {513, title = {An exploratory study of programmed tests}, journal = {Educational and Psychological Measurement}, volume = {28}, year = {1969}, pages = {345-360}, author = {Cleary, T. A. and Linn, R. L. and Rock, D. A.} } @conference {1254, title = {Individualized assessment of differential abilities}, booktitle = {Paper presented at the 77th annual meeting of the American Psychological Association.}, year = {1969}, author = {Weiss, D. J.} } @inbook {1790, title = {An investigation of computer-based science testing}, year = {1969}, address = {R. C. Atkinson and H. A. Wilson (Eds.), Computer-assisted instruction: A book of readings. New York: Academic Press.}, author = {Hansen, D. N.} } @conference {839, title = {Psychometric problems with branching tests}, booktitle = {Paper presented at the annual meeting of the American Psychological Association.}, year = {1969}, note = {$\#$BA69-01}, author = {Bayroff, A. G.} } @booklet {1609, title = {Short tailored tests (RB-69-63)}, number = {(RB-69-63)}, year = {1969}, address = {Princeton NJ: Educational Testing Service}, author = {Stocking, M. L.} } @article {589, title = {Use of an on-line computer for psychological testing with the up-and-down method}, journal = {American Psychologist}, volume = {24}, year = {1969}, pages = {207-211}, author = {Kappauf, W. E.} } @book {1674, title = {Computer-assisted testing (Eds.)}, year = {1968}, address = {Princeton NJ: Educational Testing Service}, author = {Harman, H. H. and Helm, C. E. and Loye, D. E.} } @booklet {1443, title = {The development and evaluation of several programmed testing methods (Research Bulletin 68-5)}, year = {1968}, note = {$\#$LI68-05}, address = {Princeton NJ: Educational Testing Service}, author = {Linn, R. L. and Rock, D. A. and Cleary, T. A.} } @booklet {1395, title = {An investigation of computer-based science testing}, year = {1968}, note = {$\#$HA68-01 (See published version.)}, address = {Tallahassee FL: Florida State University}, author = {Hansen, D. N. and Schwarz, G.} } @booklet {1399, title = {An investigation of computer-based science testing}, year = {1968}, address = {Tallahassee: Institute of Human Learning, Florida State University}, author = {Hansen, D. N. and Schwarz, G.} } @article {661, title = {Methodological determination of the PEST (parameter estimation by sequential testing) procedure}, journal = {Perception and psychophysics}, volume = {3}, year = {1968}, pages = {285-289}, author = {Pollack, I} } @article {512, title = {Reproduction of total test score through the use of sequential programmed tests}, journal = {Journal of Educational Measurement}, volume = {5}, year = {1968}, pages = {183-187}, author = {Cleary, T. A. and Linn, R. L. and Rock, D. A.} } @booklet {1319, title = {An exploratory study of branching tests (Technical Research Note 188)}, year = {1967}, address = {Washington DC: US Army Behavioral Science Research Laboratory. (NTIS No. AD 655263)}, author = {Bayroff, A. G. and Seeley, L. C.} } @inbook {1750, title = {New light on test strategy from decision theory}, year = {1966}, address = {A. Anastasi (Ed.). Testing problems in perspective. Washington DC: American Council on Education.}, author = {Cronbach, L. J.} } @inbook {1796, title = {Programmed testing in the examinations of the National Board of Medical Examiners}, year = {1966}, address = {A. Anastasi (Ed.), Testing problems in perspective. Washington DC: American Council on Education.}, author = {Hubbard, J. P.} } @article {562, title = {Adaptive testing in an older population}, journal = {Journal of Psychology}, volume = {60}, year = {1965}, pages = {193-198}, author = {Greenwood, D. I. and Taylor, C.} } @article {1317, title = {Feasibility of a programmed testing machine}, year = {1964}, address = {US Army Personnel Research Office Research Study 64-3.}, author = {Bayroff, A. G.} } @booklet {1562, title = {Preliminary evaluation of simulated branching tests}, year = {1964}, address = {U.S. Army Personnel Research Office Technical Research Note 140.}, author = {Waters, C. J.} } @book {1690, title = {An evaluation of the sequential method of testing}, year = {1962}, note = {$\#$PA62-1 University Microfilms Number 63-1748.}, address = {Unpublished doctoral dissertation, Michigan State University}, author = {Paterson, J. J.} } @booklet {1599, title = {Exploratory study of a sequential item test}, year = {1962}, address = {U.S. Army Personnel Research Office, Technical Research Note 129.}, author = {Seeley, L. C. and Morton, M. A. and Anderson, A. A.} } @book {1696, title = {An analysis of the application of utility theory to the development of two-stage testing models}, year = {1961}, address = {Unpublished doctoral dissertation, University of Buffalo}, author = {Rosenbach, J. H.} } @booklet {1320, title = {Construction of an experimental sequential item test (Research Memorandum 60-1)}, year = {1960}, address = {Washington DC: Personnel Research Branch, Department of the Army}, author = {Bayroff, A. G. and Thomas, J. J and Anderson, A. A.} } @booklet {1426, title = {Progress report on the sequential item test}, year = {1959}, address = {East Lansing MI: Michigan State University, Bureau of Educational Research}, author = {Krathwohl, D.} } @booklet {1310, title = {The multi-level experiment: A study of a two-level test system for the College Board Scholastic Aptitude Test}, number = {(Statistical Report 58-21)}, year = {1958}, address = {Princeton NJ: Educational Testing Service}, author = {Angoff, W. H. Huddleston, E. M.} } @article {600, title = {The sequential item test}, journal = {American Psychologist}, volume = {2}, year = {1956}, pages = {419}, author = {Krathwohl, D. R. and Huyser, R. J.} } @article {437, title = { An empirical study of the applicability of sequential analysis to item selection}, volume = {13}, year = {1953}, pages = {3-13}, author = {Anastasi, A.} } @article {439, title = { Sequential analysis with more than two alternative hypotheses, and its relation to discriminant function analysis}, volume = {12}, year = {1950}, pages = {137-144}, author = {Armitage, P.} } @article {647, title = {Some empirical aspects of the sequential analysis technique as applied to an achievement examination}, journal = {Journal of Experimental Education}, volume = {18}, year = {1950}, pages = {195-207}, author = {Moonan, W. J.} } @article {577, title = {A clinical study of consecutive and adaptive testing with the revised Stanford-Binet}, journal = {Journal of Consulting Psychology}, volume = {11}, year = {1947}, pages = {93-103}, author = {Hutt, M. L.} } @article {518, title = {An application of sequential sampling to testing students}, journal = {Journal of the American Statistical Association}, volume = {41}, year = {1946}, pages = {547-556}, author = {Cowden, D. J.} } @book {1649, title = {A method of measuring the development of the intelligence of young children}, year = {1915}, address = {Chicago: Chicago Medical Book Co}, author = {Binet, A. and Simon, T.} } @article {484, title = {Le development de lintelligence chez les enfants}, journal = {LAnee Psychologique}, volume = {14}, year = {1908}, note = {In French}, pages = {1-94}, author = {Binet, A. and Simon, T.} } @article {483, title = {Mthode nouvelle pour le diagnostic du niveau intellectuel des anormaux}, journal = {L{\textquoteright}Anne Psychologique}, volume = {11}, year = {1905}, note = {(also cited as: Applications des methods nouvelles au diagnostic du niveau intellectual chez des enfants normaux et anourmaux dhospice et decole primaire, 245-336.) In French}, pages = {191-244}, author = {Binet, A. and Simon, Th. A.} } @article {2746, title = {Microcomputer network for computer-ized adaptive testing (CAT) }, journal = {NPRDC-TR-84-33}, publisher = {San Diego: Navy Personnel Research and Development Center.}, url = {https://apps.dtic.mil/dtic/tr/fulltext/u2/a140256.pdf}, author = {Quan, B. and Park, T.A. and Sandahl, G. and Wolfe, J.H.} }