@article {2717, title = {Time-Efficient Adaptive Measurement of Change}, journal = {Journal of Computerized Adaptive Testing}, volume = {7}, year = {2019}, pages = {15-34}, abstract = {
The adaptive measurement of change (AMC) refers to the use of computerized adaptive testing (CAT) at multiple occasions to efficiently assess a respondent\’s improvement, decline, or sameness from occasion to occasion. Whereas previous AMC research focused on administering the most informative item to a respondent at each stage of testing, the current research proposes the use of Fisher information per time unit as an item selection procedure for AMC. The latter procedure incorporates not only the amount of information provided by a given item but also the expected amount of time required to complete it. In a simulation study, the use of Fisher information per time unit item selection resulted in a lower false positive rate in the majority of conditions studied, and a higher true positive rate in all conditions studied, compared to item selection via Fisher information without accounting for the expected time taken. Future directions of research are suggested.
}, keywords = {adaptive measurement of change, computerized adaptive testing, Fisher information, item selection, response-time modeling}, issn = {2165-6592}, doi = {10.7333/1909-0702015}, url = {http://iacat.org/jcat/index.php/jcat/article/view/73/35}, author = {Matthew Finkelman and Chun Wang} } @conference {2651, title = {Adaptive Item and Feedback Selection in Personalized Learning with a Network Approach}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {Personalized learning is a term used to describe educational systems that adapt student-specific curriculum sequencing, pacing, and presentation based on their unique backgrounds, knowledge, preferences, interests, and learning goals. (Chen, 2008; Netcoh, 2016). The technological approach to personalized learning provides data-driven models to incorporate these adaptations automatically. Examples of applications include online learning systems, educational games, and revision-aid systems. In this study we introduce Bayesian networks as a methodology to implement an adaptive framework within a personalized learning environment. Existing ideas from Computerized Adaptive Testing (CAT) with Item Response Theory (IRT), where choices about content provision are based on maximizing information, are related to the goals of personalized learning environments. Personalized learning entails other goals besides efficient ability estimation by maximizing information, such as an adaptive configuration of preferences and feedback to the student. These considerations will be discussed and their application in networks will be illustrated.
Adaptivity in Personalized Learning.In standard CAT\’s there is a focus on selecting items that provide maximum information about the ability of an individual at a certain point in time (Van der Linden \& Glas, 2000). When learning is the main goal of testing, alternative adaptive item selection methods were explored by Eggen (2012). The adaptive choices made in personalized learning applications require additional adaptivity with respect to the following aspects; the moment of feedback, the kind of feedback, and the possibility for students to actively influence the learning process.
Bayesian Networks and Personalized Learning.Personalized learning aims at constructing a framework to incorporate all the aspects mentioned above. Therefore, the goal of this framework is not only to focus on retrieving ability estimates by choosing items on maximum information, but also to construct a framework that allows for these other factors to play a role. Plajner and Vomlel (2016) have already applied Bayesian Networks to adaptive testing, selecting items with help of entropy reduction. Almond et al. (2015) provide a reference work on Bayesian Networks in Educational Assessment. Both acknowledge the potential of the method in terms of features such as modularity options to build finer-grained models. IRT does not allow to model sub-skills very easily and to gather information on fine-grained level, due to its dependency on the assumption of generally one underlying trait. The local independence assumption in IRT implies being interested in mainly the student\’s overall ability on the subject of interest. When the goal is to improve student\’s learning, we are not just interested in efficiently coming to their test score on a global subject. One wants a model that is able to map\ educational problems and talents in detail over the whole educational program, while allowing for dependency between items. The moment in time can influence topics to be better mastered than others, and this is exactly what we can to get out of a model. The possibility to model flexible structures, estimate abilities on a very detailed level for sub-skills and to easily incorporate other variables such as feedback in Bayesian Networks makes it a very promising method for making adaptive choices in personalized learning. It is shown in this research how item and feedback selection can be performed with help of the promising Bayesian Networks. A student involvement possibility is also introduced and evaluated.
References
Almond, R. G., Mislevy, R. J., Steinberg, L. S., Yan, D., \& Williamson, D. M. (2015). Bayesian Networks in Educational Assessment. Test. New York: Springer Science+Business Media. http://doi.org/10.1007/978-0-387-98138-3
Eggen, T.J.H.M. (2012) Computerized Adaptive Testing Item Selection in Computerized Adaptive Learning Systems. In: Eggen. TJHM \& Veldkamp, BP.. (Eds). Psychometrics in Practice at RCEC. Enschede: RCEC
Netcoh, S. (2016, March). \“What Do You Mean by \‘Personalized Learning?\’. Croscutting Conversations in Education \– Research, Reflections \& Practice. Blogpost.
Plajner, M., \& Vomlel, J. (2016). Student Skill Models in Adaptive Testing. In Proceedings of the Eighth International Conference on Probabilistic Graphical Models (pp. 403-414).
Van der Linden, W. J., \& Glas, C. A. (2000). Computerized adaptive testing: Theory and practice. Dordrecht: Kluwer Academic Publishers.
}, keywords = {feedback selection, item selection, network approach, personalized learning}, author = {Nikky van Buuren and Hendrik Straat and Theo Eggen and Jean-Paul Fox} } @conference {2649, title = {Considerations in Performance Evaluations of Computerized Formative Assessments}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {Computerized adaptive instruments have been widely established and used in the context of summative assessments for purposes including licensure, admissions and proficiency testing. The benefits of examinee tailored examinations, which can provide estimates of performance that are more reliable and valid, have in recent years attracted a greater audience (i.e. patient oriented outcomes, test prep, etc.). Formative assessment, which are most widely understood in their implementation as diagnostic tools, have recently started to expand to lesser known areas of computerized testing such as in implementations of instructional designs aiming to maximize examinee learning through targeted practice.
Using a CAT instrument within the framework of evaluating repetitious examinee performances (in such settings as a Quiz Bank practices for example) poses unique challenges not germane to summative assessments. The scale on which item parameters (and subsequently examinee performance estimates such as Maximum Likelihood Estimates) are determined usually do not take change over time under consideration. While vertical scaling features resolve the learning acquisition problem, most content practice engines do not make use of explicit practice windows which could be vertically aligned. Alternatively, the Multidimensional (MIRT)- and Hierarchical Item Response Theory (HIRT) models allow for the specification of random effects associated with change over time in examinees\’ skills, but are often complex and require content and usage resources not often observed.
The research submitted for consideration simulated examinees\’ repeated variable length Quiz Bank practice in algebra using a 500 1-PL operational item pool. The stability simulations sought to determine with which rolling item interval size ability estimates would provide the most informative insight into the examinees\’ learning progression over time. Estimates were evaluated in terms of reduction in estimate uncertainty, bias and RMSD with the true and total item based ability estimates. It was found that rolling item intervals between 20-25 items provided the best reduction of uncertainty around the estimate without compromising the ability to provide informed performance estimates to students. However, while asymptotically intervals of 20-25 items tended to provide adequate estimates of performance, changes over shorter periods of time assessed with shorter quizzes could not be detected as those changes would be suppressed in lieu of the performance based on the full interval considered. Implications for infrastructure (such as recommendation engines, etc.), product and scale development are discussed.
}, keywords = {algebra, Formative Assessment, Performance Evaluations}, author = {Michael Chajewski and John Harnisher} } @conference {2669, title = {FastCAT {\textendash} Customizing CAT Administration Rules to Increase Response Efficiency}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {A typical pre-requisite for CAT administration is the existence of an underlying item bank completely covering the range of the trait being measured. When a bank fails to cover the full range of the trait, examinees who are close to the floor or ceiling will often never achieve a standard error cut-off and examinees will be forced to answer items increasingly less relevant to their trait level. This scenario is fairly typical for many patients responding to patient reported outcome measures (PROMS). For IACAT 2017 ABSTRACTS BOOKLET 65 example, in the assessment of physical functioning, many item banks ceiling at about the 50\%ile. For most healthy patients, after a few items the only items remaining in the bank will represent decreasing ability (even though the patient has already indicated that they are at or above the mean for the population). Another example would be for a patient with no pain taking a Pain CAT. They will probably answer \“Never\” pain for every succeeding item out to the maximum test length. For this project we sought to reduce patient burden, while maintaining test accuracy, through the reduction of CAT length using novel stopping rules.
We studied CAT administration assessment histories for patients who were administered Patient Reported Outcomes Measurement Information System (PROMIS) CATs. In the PROMIS 1 Wave 2 Back Pain/Depression Study, CATs were administered to N=417 cases assessed across 11 PROMIS domains. Original CAT administration rules were: start with a pre-identified item of moderate difficulty; administer a minimum four items per case; stop when an estimated theta\’s SE declines to \< 0.3 OR a maximum 12 items are administered.
Original CAT. 12,622 CAT administrations were analyzed. CATs ranged in number of items administered from 4 to 12 items; 72.5\% were 4-item CATs. The second and third most frequently occurring CATs were 5-item (n=1102; 8.7\%) and 12-item CATs (n=964; 7.6\%). 64,062 items total were administered, averaging 5.1 items per CAT. Customized CAT. Three new CAT stopping rules were introduced, each with potential to increase item-presentation efficiency and maintain required score precision: Stop if a case responds to the first two items administered using an \“extreme\” response category (towards the ceiling or floor for the in item bank, or at ); administer a minimum two items per case; stop if the change in SE estimate (previous to current item administration) is positive but \< 0.01.
The three new stopping rules reduced the total number of items administered by 25,643 to 38,419 items (40.0\% reduction). After four items were administered, only n=1,824 CATs (14.5\%) were still in assessment mode (vs. n=3,477 (27.5\%) in the original CATs). On average, cases completed 3.0 items per CAT (vs. 5.1).
Each new rule addressed specific inefficiencies in the original CAT administration process: Cases not having or possessing a low/clinically unimportant level of the assessed domain; allow the SE \<0.3 stopping criterion to come into effect earlier in the CAT administration process; cases experiencing poor domain item bank measurement, (e.g., \“floor,\” \“ceiling\” cases).
}, keywords = {Administration Rules, Efficiency, FastCAT}, url = {https://drive.google.com/open?id=1oPJV-x0p9hRmgJ7t6k-MCC1nAoBSFM1w}, author = {Richard C. Gershon} } @conference {2631, title = {Generating Rationales to Support Formative Feedback in Adaptive Testing}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {Computer adaptive testing offers many important benefits to support and promote life-long learning. Computers permit testing on-demand thereby allowing students to take the test at any time during instruction; items on computerized tests are scored immediately thereby providing students with instant feedback; computerized tests permit continuous administration thereby allowing students to have more choice about when they write their exams. But despite these important benefits, the advent of computer adaptive testing has also raised formidable challenges, particularly in the area of item development. Educators must have access to large numbers of diverse, high-quality test items to implement computerize adaptive testing because items are continuously administered to students. Hence, hundreds or even thousands of items are needed to develop the test item banks necessary for computer adaptive testing. Unfortunately, educational test items, as they are currently created, are time consuming and expensive to develop because each individual item is written, initially, by a content specialist and, then, reviewed, edited, and revised by groups of content specialists to ensure the items yield reliable and valid information. Hence, item development is one of the most important problems that must be solved before we can migrate to computer adaptive testing to support life-long learning because large numbers of high-quality, content-specific, test items are required.
One promising item development method that may be used to address this challenge is with automatic item generation. Automatic item generation is a relatively new but rapidly evolving research area where cognitive and psychometric modelling practices are used produce hundreds of new test items with the aid of computer technology. The purpose of our presentation is to describe a new methodology for generating both the items and the rationales required to solve each generated item in order to produce the feedback needed to support life-long learning. Our item generation methodology will first be described. To ensure our description is practical, the method will also be demonstrated using generated items from the health sciences to demonstrate how item generation can promote life-long learning for medical educators and practitioners.
\
}, keywords = {Adaptive Testing, formative feedback, Item generation}, url = {https://drive.google.com/open?id=1O5KDFtQlDLvhNoDr7X4JO4arpJkIHKUP}, author = {Mark Gierl and Okan Bulut} } @conference {2080, title = {Adaptive Item Calibration and Norming: Unique Considerations of a Global Deployment}, booktitle = {Annual Conference of the International Association for Computerized Adaptive Testing}, year = {2011}, month = {10/2011}, keywords = {CAT, common item equating, Figural Reasoning Test, item calibration, norming}, author = {Alexander Schwall and Evan Sinar} } @article {46, title = {Development and validation of patient-reported outcome measures for sleep disturbance and sleep-related impairments}, journal = {Sleep}, volume = {33}, number = {6}, year = {2010}, note = {Buysse, Daniel JYu, LanMoul, Douglas EGermain, AnneStover, AngelaDodds, Nathan EJohnston, Kelly LShablesky-Cade, Melissa APilkonis, Paul AAR052155/AR/NIAMS NIH HHS/United StatesU01AR52155/AR/NIAMS NIH HHS/United StatesU01AR52158/AR/NIAMS NIH HHS/United StatesU01AR52170/AR/NIAMS NIH HHS/United StatesU01AR52171/AR/NIAMS NIH HHS/United StatesU01AR52177/AR/NIAMS NIH HHS/United StatesU01AR52181/AR/NIAMS NIH HHS/United StatesU01AR52186/AR/NIAMS NIH HHS/United StatesResearch Support, N.I.H., ExtramuralValidation StudiesUnited StatesSleepSleep. 2010 Jun 1;33(6):781-92.}, month = {Jun 1}, pages = {781-92}, edition = {2010/06/17}, abstract = {STUDY OBJECTIVES: To develop an archive of self-report questions assessing sleep disturbance and sleep-related impairments (SRI), to develop item banks from this archive, and to validate and calibrate the item banks using classic validation techniques and item response theory analyses in a sample of clinical and community participants. DESIGN: Cross-sectional self-report study. SETTING: Academic medical center and participant homes. PARTICIPANTS: One thousand nine hundred ninety-three adults recruited from an Internet polling sample and 259 adults recruited from medical, psychiatric, and sleep clinics. INTERVENTIONS: None. MEASUREMENTS AND RESULTS: This study was part of PROMIS (Patient-Reported Outcomes Information System), a National Institutes of Health Roadmap initiative. Self-report item banks were developed through an iterative process of literature searches, collecting and sorting items, expert content review, qualitative patient research, and pilot testing. Internal consistency, convergent validity, and exploratory and confirmatory factor analysis were examined in the resulting item banks. Factor analyses identified 2 preliminary item banks, sleep disturbance and SRI. Item response theory analyses and expert content review narrowed the item banks to 27 and 16 items, respectively. Validity of the item banks was supported by moderate to high correlations with existing scales and by significant differences in sleep disturbance and SRI scores between participants with and without sleep disorders. CONCLUSIONS: The PROMIS sleep disturbance and SRI item banks have excellent measurement properties and may prove to be useful for assessing general aspects of sleep and SRI with various groups of patients and interventions.}, keywords = {*Outcome Assessment (Health Care), *Self Disclosure, Adult, Aged, Aged, 80 and over, Cross-Sectional Studies, Factor Analysis, Statistical, Female, Humans, Male, Middle Aged, Psychometrics, Questionnaires, Reproducibility of Results, Sleep Disorders/*diagnosis, Young Adult}, isbn = {0161-8105 (Print)0161-8105 (Linking)}, author = {Buysse, D. J. and Yu, L. and Moul, D. E. and Germain, A. and Stover, A. and Dodds, N. E. and Johnston, K. L. and Shablesky-Cade, M. A. and Pilkonis, P. A.} } @article {7, title = {Development and preliminary testing of a computerized adaptive assessment of chronic pain}, journal = {Journal of Pain}, volume = {10}, number = {9}, year = {2009}, note = {Anatchkova, Milena DSaris-Baglama, Renee NKosinski, MarkBjorner, Jakob B1R43AR052251-01A1/AR/NIAMS NIH HHS/United StatesEvaluation StudiesResearch Support, N.I.H., ExtramuralUnited StatesThe journal of pain : official journal of the American Pain SocietyJ Pain. 2009 Sep;10(9):932-43.}, month = {Sep}, pages = {932-943}, edition = {2009/07/15}, abstract = {The aim of this article is to report the development and preliminary testing of a prototype computerized adaptive test of chronic pain (CHRONIC PAIN-CAT) conducted in 2 stages: (1) evaluation of various item selection and stopping rules through real data-simulated administrations of CHRONIC PAIN-CAT; (2) a feasibility study of the actual prototype CHRONIC PAIN-CAT assessment system conducted in a pilot sample. Item calibrations developed from a US general population sample (N = 782) were used to program a pain severity and impact item bank (kappa = 45), and real data simulations were conducted to determine a CAT stopping rule. The CHRONIC PAIN-CAT was programmed on a tablet PC using QualityMetric{\textquoteright}s Dynamic Health Assessment (DYHNA) software and administered to a clinical sample of pain sufferers (n = 100). The CAT was completed in significantly less time than the static (full item bank) assessment (P < .001). On average, 5.6 items were dynamically administered by CAT to achieve a precise score. Scores estimated from the 2 assessments were highly correlated (r = .89), and both assessments discriminated across pain severity levels (P < .001, RV = .95). Patients{\textquoteright} evaluations of the CHRONIC PAIN-CAT were favorable. PERSPECTIVE: This report demonstrates that the CHRONIC PAIN-CAT is feasible for administration in a clinic. The application has the potential to improve pain assessment and help clinicians manage chronic pain.}, keywords = {*Computers, *Questionnaires, Activities of Daily Living, Adaptation, Psychological, Chronic Disease, Cohort Studies, Disability Evaluation, Female, Humans, Male, Middle Aged, Models, Psychological, Outcome Assessment (Health Care), Pain Measurement/*methods, Pain, Intractable/*diagnosis/psychology, Psychometrics, Quality of Life, User-Computer Interface}, isbn = {1528-8447 (Electronic)1526-5900 (Linking)}, author = {Anatchkova, M. D. and Saris-Baglama, R. N. and Kosinski, M. and Bjorner, J. B.} } @article {138, title = {Development of an item bank for the assessment of depression in persons with mental illnesses and physical diseases using Rasch analysis}, journal = {Rehabilitation Psychology}, volume = {54}, number = {2}, year = {2009}, note = {Forkmann, ThomasBoecker, MarenNorra, ChristineEberle, NicoleKircher, TiloSchauerte, PatrickMischke, KarlWesthofen, MartinGauggel, SiegfriedWirtz, MarkusResearch Support, Non-U.S. Gov{\textquoteright}tUnited StatesRehabilitation psychologyRehabil Psychol. 2009 May;54(2):186-97.}, month = {May}, pages = {186-97}, edition = {2009/05/28}, abstract = {OBJECTIVE: The calibration of item banks provides the basis for computerized adaptive testing that ensures high diagnostic precision and minimizes participants{\textquoteright} test burden. The present study aimed at developing a new item bank that allows for assessing depression in persons with mental and persons with somatic diseases. METHOD: The sample consisted of 161 participants treated for a depressive syndrome, and 206 participants with somatic illnesses (103 cardiologic, 103 otorhinolaryngologic; overall mean age = 44.1 years, SD =14.0; 44.7\% women) to allow for validation of the item bank in both groups. Persons answered a pool of 182 depression items on a 5-point Likert scale. RESULTS: Evaluation of Rasch model fit (infit < 1.3), differential item functioning, dimensionality, local independence, item spread, item and person separation (>2.0), and reliability (>.80) resulted in a bank of 79 items with good psychometric properties. CONCLUSIONS: The bank provides items with a wide range of content coverage and may serve as a sound basis for computerized adaptive testing applications. It might also be useful for researchers who wish to develop new fixed-length scales for the assessment of depression in specific rehabilitation settings.}, keywords = {Adaptation, Psychological, Adult, Aged, Depressive Disorder/*diagnosis/psychology, Diagnosis, Computer-Assisted, Female, Heart Diseases/*psychology, Humans, Male, Mental Disorders/*psychology, Middle Aged, Models, Statistical, Otorhinolaryngologic Diseases/*psychology, Personality Assessment/statistics \& numerical data, Personality Inventory/*statistics \& numerical data, Psychometrics/statistics \& numerical data, Questionnaires, Reproducibility of Results, Sick Role}, isbn = {0090-5550 (Print)0090-5550 (Linking)}, author = {Forkmann, T. and Boecker, M. and Norra, C. and Eberle, N. and Kircher, T. and Schauerte, P. and Mischke, K. and Westhofen, M. and Gauggel, S. and Wirtz, M.} } @article {227, title = {An evaluation of patient-reported outcomes found computerized adaptive testing was efficient in assessing stress perception}, journal = {Journal of Clinical Epidemiology}, volume = {62}, number = {3}, year = {2009}, note = {Kocalevent, Ruya-DanielaRose, MatthiasBecker, JanineWalter, Otto BFliege, HerbertBjorner, Jakob BKleiber, DieterKlapp, Burghard FEvaluation StudiesUnited StatesJournal of clinical epidemiologyJ Clin Epidemiol. 2009 Mar;62(3):278-87, 287.e1-3. Epub 2008 Jul 18.}, pages = {278-287}, edition = {2008/07/22}, abstract = {OBJECTIVES: This study aimed to develop and evaluate a first computerized adaptive test (CAT) for the measurement of stress perception (Stress-CAT), in terms of the two dimensions: exposure to stress and stress reaction. STUDY DESIGN AND SETTING: Item response theory modeling was performed using a two-parameter model (Generalized Partial Credit Model). The evaluation of the Stress-CAT comprised a simulation study and real clinical application. A total of 1,092 psychosomatic patients (N1) were studied. Two hundred simulees (N2) were generated for a simulated response data set. Then the Stress-CAT was given to n=116 inpatients, (N3) together with established stress questionnaires as validity criteria. RESULTS: The final banks included n=38 stress exposure items and n=31 stress reaction items. In the first simulation study, CAT scores could be estimated with a high measurement precision (SE<0.32; rho>0.90) using 7.0+/-2.3 (M+/-SD) stress reaction items and 11.6+/-1.7 stress exposure items. The second simulation study reanalyzed real patients data (N1) and showed an average use of items of 5.6+/-2.1 for the dimension stress reaction and 10.0+/-4.9 for the dimension stress exposure. Convergent validity showed significantly high correlations. CONCLUSIONS: The Stress-CAT is short and precise, potentially lowering the response burden of patients in clinical decision making.}, keywords = {*Diagnosis, Computer-Assisted, Adolescent, Adult, Aged, Aged, 80 and over, Confidence Intervals, Female, Humans, Male, Middle Aged, Perception, Quality of Health Care/*standards, Questionnaires, Reproducibility of Results, Sickness Impact Profile, Stress, Psychological/*diagnosis/psychology, Treatment Outcome}, isbn = {1878-5921 (Electronic)0895-4356 (Linking)}, author = {Kocalevent, R. D. and Rose, M. and Becker, J. and Walter, O. B. and Fliege, H. and Bjorner, J. B. and Kleiber, D. and Klapp, B. F.} } @article {170, title = {Measuring global physical health in children with cerebral palsy: Illustration of a multidimensional bi-factor model and computerized adaptive testing}, journal = {Quality of Life Research}, volume = {18}, number = {3}, year = {2009}, note = {Haley, Stephen MNi, PengshengDumas, Helene MFragala-Pinkham, Maria AHambleton, Ronald KMontpetit, KathleenBilodeau, NathalieGorton, George EWatson, KyleTucker, Carole AK02 HD045354-01A1/HD/NICHD NIH HHS/United StatesK02 HD45354-01A1/HD/NICHD NIH HHS/United StatesResearch Support, N.I.H., ExtramuralResearch Support, Non-U.S. Gov{\textquoteright}tNetherlandsQuality of life research : an international journal of quality of life aspects of treatment, care and rehabilitationQual Life Res. 2009 Apr;18(3):359-70. Epub 2009 Feb 17.}, month = {Apr}, pages = {359-370}, edition = {2009/02/18}, abstract = {PURPOSE: The purposes of this study were to apply a bi-factor model for the determination of test dimensionality and a multidimensional CAT using computer simulations of real data for the assessment of a new global physical health measure for children with cerebral palsy (CP). METHODS: Parent respondents of 306 children with cerebral palsy were recruited from four pediatric rehabilitation hospitals and outpatient clinics. We compared confirmatory factor analysis results across four models: (1) one-factor unidimensional; (2) two-factor multidimensional (MIRT); (3) bi-factor MIRT with fixed slopes; and (4) bi-factor MIRT with varied slopes. We tested whether the general and content (fatigue and pain) person score estimates could discriminate across severity and types of CP, and whether score estimates from a simulated CAT were similar to estimates based on the total item bank, and whether they correlated as expected with external measures. RESULTS: Confirmatory factor analysis suggested separate pain and fatigue sub-factors; all 37 items were retained in the analyses. From the bi-factor MIRT model with fixed slopes, the full item bank scores discriminated across levels of severity and types of CP, and compared favorably to external instruments. CAT scores based on 10- and 15-item versions accurately captured the global physical health scores. CONCLUSIONS: The bi-factor MIRT CAT application, especially the 10- and 15-item versions, yielded accurate global physical health scores that discriminated across known severity groups and types of CP, and correlated as expected with concurrent measures. The CATs have potential for collecting complex data on the physical health of children with CP in an efficient manner.}, keywords = {*Computer Simulation, *Health Status, *Models, Statistical, Adaptation, Psychological, Adolescent, Cerebral Palsy/*physiopathology, Child, Child, Preschool, Factor Analysis, Statistical, Female, Humans, Male, Massachusetts, Pennsylvania, Questionnaires, Young Adult}, isbn = {0962-9343 (Print)0962-9343 (Linking)}, author = {Haley, S. M. and Ni, P. and Dumas, H. M. and Fragala-Pinkham, M. A. and Hambleton, R. K. and Montpetit, K. and Bilodeau, N. and Gorton, G. E. and Watson, K. and Tucker, C. A.} } @article {78, title = {Reduction in patient burdens with graphical computerized adaptive testing on the ADL scale: tool development and simulation}, journal = {Health and Quality of Life Outcomes}, volume = {7}, year = {2009}, note = {Chien, Tsair-WeiWu, Hing-ManWang, Weng-ChungCastillo, Roberto VasquezChou, WillyComparative StudyValidation StudiesEnglandHealth and quality of life outcomesHealth Qual Life Outcomes. 2009 May 5;7:39.}, pages = {39}, edition = {2009/05/07}, abstract = {BACKGROUND: The aim of this study was to verify the effectiveness and efficacy of saving time and reducing burden for patients, nurses, and even occupational therapists through computer adaptive testing (CAT). METHODS: Based on an item bank of the Barthel Index (BI) and the Frenchay Activities Index (FAI) for assessing comprehensive activities of daily living (ADL) function in stroke patients, we developed a visual basic application (VBA)-Excel CAT module, and (1) investigated whether the averaged test length via CAT is shorter than that of the traditional all-item-answered non-adaptive testing (NAT) approach through simulation, (2) illustrated the CAT multimedia on a tablet PC showing data collection and response errors of ADL clinical functional measures in stroke patients, and (3) demonstrated the quality control of endorsing scale with fit statistics to detect responding errors, which will be further immediately reconfirmed by technicians once patient ends the CAT assessment. RESULTS: The results show that endorsed items could be shorter on CAT (M = 13.42) than on NAT (M = 23) at 41.64\% efficiency in test length. However, averaged ability estimations reveal insignificant differences between CAT and NAT. CONCLUSION: This study found that mobile nursing services, placed at the bedsides of patients could, through the programmed VBA-Excel CAT module, reduce the burden to patients and save time, more so than the traditional NAT paper-and-pencil testing appraisals.}, keywords = {*Activities of Daily Living, *Computer Graphics, *Computer Simulation, *Diagnosis, Computer-Assisted, Female, Humans, Male, Point-of-Care Systems, Reproducibility of Results, Stroke/*rehabilitation, Taiwan, United States}, isbn = {1477-7525 (Electronic)1477-7525 (Linking)}, author = {Chien, T. W. and Wu, H. M. and Wang, W-C. and Castillo, R. V. and Chou, W.} } @article {173, title = {Replenishing a computerized adaptive test of patient-reported daily activity functioning}, journal = {Quality of Life Research}, volume = {18}, number = {4}, year = {2009}, note = {Haley, Stephen MNi, PengshengJette, Alan MTao, WeiMoed, RichardMeyers, DougLudlow, Larry HK02 HD45354-01/HD/NICHD NIH HHS/United StatesResearch Support, N.I.H., ExtramuralNetherlandsQuality of life research : an international journal of quality of life aspects of treatment, care and rehabilitationQual Life Res. 2009 May;18(4):461-71. Epub 2009 Mar 14.}, month = {May}, pages = {461-71}, edition = {2009/03/17}, abstract = {PURPOSE: Computerized adaptive testing (CAT) item banks may need to be updated, but before new items can be added, they must be linked to the previous CAT. The purpose of this study was to evaluate 41 pretest items prior to including them into an operational CAT. METHODS: We recruited 6,882 patients with spine, lower extremity, upper extremity, and nonorthopedic impairments who received outpatient rehabilitation in one of 147 clinics across 13 states of the USA. Forty-one new Daily Activity (DA) items were administered along with the Activity Measure for Post-Acute Care Daily Activity CAT (DA-CAT-1) in five separate waves. We compared the scoring consistency with the full item bank, test information function (TIF), person standard errors (SEs), and content range of the DA-CAT-1 to the new CAT (DA-CAT-2) with the pretest items by real data simulations. RESULTS: We retained 29 of the 41 pretest items. Scores from the DA-CAT-2 were more consistent (ICC = 0.90 versus 0.96) than DA-CAT-1 when compared with the full item bank. TIF and person SEs were improved for persons with higher levels of DA functioning, and ceiling effects were reduced from 16.1\% to 6.1\%. CONCLUSIONS: Item response theory and online calibration methods were valuable in improving the DA-CAT.}, keywords = {*Activities of Daily Living, *Disability Evaluation, *Questionnaires, *User-Computer Interface, Adult, Aged, Cohort Studies, Computer-Assisted Instruction, Female, Humans, Male, Middle Aged, Outcome Assessment (Health Care)/*methods}, isbn = {0962-9343 (Print)0962-9343 (Linking)}, author = {Haley, S. M. and Ni, P. and Jette, A. M. and Tao, W. and Moed, R. and Meyers, D. and Ludlow, L. H.} } @article {212, title = {Adaptive short forms for outpatient rehabilitation outcome assessment}, journal = {American Journal of Physical Medicine and Rehabilitation}, volume = {87}, number = {10}, year = {2008}, note = {Jette, Alan MHaley, Stephen MNi, PengshengMoed, RichardK02 HD45354-01/HD/NICHD NIH HHS/United StatesR01 HD43568/HD/NICHD NIH HHS/United StatesResearch Support, N.I.H., ExtramuralResearch Support, U.S. Gov{\textquoteright}t, Non-P.H.S.Research Support, U.S. Gov{\textquoteright}t, P.H.S.United StatesAmerican journal of physical medicine \& rehabilitation / Association of Academic PhysiatristsAm J Phys Med Rehabil. 2008 Oct;87(10):842-52.}, month = {Oct}, pages = {842-52}, edition = {2008/09/23}, abstract = {OBJECTIVE: To develop outpatient Adaptive Short Forms for the Activity Measure for Post-Acute Care item bank for use in outpatient therapy settings. DESIGN: A convenience sample of 11,809 adults with spine, lower limb, upper limb, and miscellaneous orthopedic impairments who received outpatient rehabilitation in 1 of 127 outpatient rehabilitation clinics in the United States. We identified optimal items for use in developing outpatient Adaptive Short Forms based on the Basic Mobility and Daily Activities domains of the Activity Measure for Post-Acute Care item bank. Patient scores were derived from the Activity Measure for Post-Acute Care computerized adaptive testing program. Items were selected for inclusion on the Adaptive Short Forms based on functional content, range of item coverage, measurement precision, item exposure rate, and data collection burden. RESULTS: Two outpatient Adaptive Short Forms were developed: (1) an 18-item Basic Mobility Adaptive Short Form and (2) a 15-item Daily Activities Adaptive Short Form, derived from the same item bank used to develop the Activity Measure for Post-Acute Care computerized adaptive testing program. Both Adaptive Short Forms achieved acceptable psychometric properties. CONCLUSIONS: In outpatient postacute care settings where computerized adaptive testing outcome applications are currently not feasible, item response theory-derived Adaptive Short Forms provide the efficient capability to monitor patients{\textquoteright} functional outcomes. The development of Adaptive Short Form functional outcome instruments linked by a common, calibrated item bank has the potential to create a bridge to outcome monitoring across postacute care settings and can facilitate the eventual transformation from Adaptive Short Forms to computerized adaptive testing applications easier and more acceptable to the rehabilitation community.}, keywords = {*Activities of Daily Living, *Ambulatory Care Facilities, *Mobility Limitation, *Treatment Outcome, Disabled Persons/psychology/*rehabilitation, Female, Humans, Male, Middle Aged, Questionnaires, Rehabilitation Centers}, isbn = {1537-7385 (Electronic)}, author = {Jette, A. M. and Haley, S. M. and Ni, P. and Moed, R.} } @article {88, title = {Assessing self-care and social function using a computer adaptive testing version of the pediatric evaluation of disability inventory}, journal = {Archives of Physical Medicine and Rehabilitation}, volume = {89}, number = {4}, year = {2008}, note = {Coster, Wendy JHaley, Stephen MNi, PengshengDumas, Helene MFragala-Pinkham, Maria AK02 HD45354-01A1/HD/NICHD NIH HHS/United StatesR41 HD052318-01A1/HD/NICHD NIH HHS/United StatesR43 HD42388-01/HD/NICHD NIH HHS/United StatesComparative StudyResearch Support, N.I.H., ExtramuralUnited StatesArchives of physical medicine and rehabilitationArch Phys Med Rehabil. 2008 Apr;89(4):622-9.}, month = {Apr}, pages = {622-629}, edition = {2008/04/01}, abstract = {OBJECTIVE: To examine score agreement, validity, precision, and response burden of a prototype computer adaptive testing (CAT) version of the self-care and social function scales of the Pediatric Evaluation of Disability Inventory compared with the full-length version of these scales. DESIGN: Computer simulation analysis of cross-sectional and longitudinal retrospective data; cross-sectional prospective study. SETTING: Pediatric rehabilitation hospital, including inpatient acute rehabilitation, day school program, outpatient clinics; community-based day care, preschool, and children{\textquoteright}s homes. PARTICIPANTS: Children with disabilities (n=469) and 412 children with no disabilities (analytic sample); 38 children with disabilities and 35 children without disabilities (cross-validation sample). INTERVENTIONS: Not applicable. MAIN OUTCOME MEASURES: Summary scores from prototype CAT applications of each scale using 15-, 10-, and 5-item stopping rules; scores from the full-length self-care and social function scales; time (in seconds) to complete assessments and respondent ratings of burden. RESULTS: Scores from both computer simulations and field administration of the prototype CATs were highly consistent with scores from full-length administration (r range, .94-.99). Using computer simulation of retrospective data, discriminant validity, and sensitivity to change of the CATs closely approximated that of the full-length scales, especially when the 15- and 10-item stopping rules were applied. In the cross-validation study the time to administer both CATs was 4 minutes, compared with over 16 minutes to complete the full-length scales. CONCLUSIONS: Self-care and social function score estimates from CAT administration are highly comparable with those obtained from full-length scale administration, with small losses in validity and precision and substantial decreases in administration time.}, keywords = {*Disability Evaluation, *Social Adjustment, Activities of Daily Living, Adolescent, Age Factors, Child, Child, Preschool, Computer Simulation, Cross-Over Studies, Disabled Children/*rehabilitation, Female, Follow-Up Studies, Humans, Infant, Male, Outcome Assessment (Health Care), Reference Values, Reproducibility of Results, Retrospective Studies, Risk Factors, Self Care/*standards/trends, Sex Factors, Sickness Impact Profile}, isbn = {1532-821X (Electronic)0003-9993 (Linking)}, author = {Coster, W. J. and Haley, S. M. and Ni, P. and Dumas, H. M. and Fragala-Pinkham, M. A.} } @article {169, title = {Computerized adaptive testing for follow-up after discharge from inpatient rehabilitation: II. Participation outcomes}, journal = {Archives of Physical Medicine and Rehabilitation}, volume = {89}, number = {2}, year = {2008}, note = {Haley, Stephen MGandek, BarbaraSiebens, HilaryBlack-Schaffer, Randie MSinclair, Samuel JTao, WeiCoster, Wendy JNi, PengshengJette, Alan MK02 HD045354-01A1/HD/NICHD NIH HHS/United StatesK02 HD45354-01/HD/NICHD NIH HHS/United StatesR01 HD043568/HD/NICHD NIH HHS/United StatesR01 HD043568-01/HD/NICHD NIH HHS/United StatesResearch Support, N.I.H., ExtramuralUnited StatesArchives of physical medicine and rehabilitationArch Phys Med Rehabil. 2008 Feb;89(2):275-83.}, month = {Feb}, pages = {275-283}, edition = {2008/01/30}, abstract = {OBJECTIVES: To measure participation outcomes with a computerized adaptive test (CAT) and compare CAT and traditional fixed-length surveys in terms of score agreement, respondent burden, discriminant validity, and responsiveness. DESIGN: Longitudinal, prospective cohort study of patients interviewed approximately 2 weeks after discharge from inpatient rehabilitation and 3 months later. SETTING: Follow-up interviews conducted in patient{\textquoteright}s home setting. PARTICIPANTS: Adults (N=94) with diagnoses of neurologic, orthopedic, or medically complex conditions. INTERVENTIONS: Not applicable. MAIN OUTCOME MEASURES: Participation domains of mobility, domestic life, and community, social, \& civic life, measured using a CAT version of the Participation Measure for Postacute Care (PM-PAC-CAT) and a 53-item fixed-length survey (PM-PAC-53). RESULTS: The PM-PAC-CAT showed substantial agreement with PM-PAC-53 scores (intraclass correlation coefficient, model 3,1, .71-.81). On average, the PM-PAC-CAT was completed in 42\% of the time and with only 48\% of the items as compared with the PM-PAC-53. Both formats discriminated across functional severity groups. The PM-PAC-CAT had modest reductions in sensitivity and responsiveness to patient-reported change over a 3-month interval as compared with the PM-PAC-53. CONCLUSIONS: Although continued evaluation is warranted, accurate estimates of participation status and responsiveness to change for group-level analyses can be obtained from CAT administrations, with a sizeable reduction in respondent burden.}, keywords = {*Activities of Daily Living, *Adaptation, Physiological, *Computer Systems, *Questionnaires, Adult, Aged, Aged, 80 and over, Chi-Square Distribution, Factor Analysis, Statistical, Female, Humans, Longitudinal Studies, Male, Middle Aged, Outcome Assessment (Health Care)/*methods, Patient Discharge, Prospective Studies, Rehabilitation/*standards, Subacute Care/*standards}, isbn = {1532-821X (Electronic)0003-9993 (Linking)}, author = {Haley, S. M. and Gandek, B. and Siebens, H. and Black-Schaffer, R. M. and Sinclair, S. J. and Tao, W. and Coster, W. J. and Ni, P. and Jette, A. M.} } @article {231, title = {Computerized adaptive testing in back pain: Validation of the CAT-5D-QOL}, journal = {Spine}, volume = {33}, number = {12}, year = {2008}, note = {Kopec, Jacek ABadii, MaziarMcKenna, MarioLima, Viviane DSayre, Eric CDvorak, MarcelResearch Support, Non-U.S. Gov{\textquoteright}tValidation StudiesUnited StatesSpineSpine (Phila Pa 1976). 2008 May 20;33(12):1384-90.}, month = {May 20}, pages = {1384-90}, edition = {2008/05/23}, abstract = {STUDY DESIGN: We have conducted an outcome instrument validation study. OBJECTIVE: Our objective was to develop a computerized adaptive test (CAT) to measure 5 domains of health-related quality of life (HRQL) and assess its feasibility, reliability, validity, and efficiency. SUMMARY OF BACKGROUND DATA: Kopec and colleagues have recently developed item response theory based item banks for 5 domains of HRQL relevant to back pain and suitable for CAT applications. The domains are Daily Activities (DAILY), Walking (WALK), Handling Objects (HAND), Pain or Discomfort (PAIN), and Feelings (FEEL). METHODS: An adaptive algorithm was implemented in a web-based questionnaire administration system. The questionnaire included CAT-5D-QOL (5 scales), Modified Oswestry Disability Index (MODI), Roland-Morris Disability Questionnaire (RMDQ), SF-36 Health Survey, and standard clinical and demographic information. Participants were outpatients treated for mechanical back pain at a referral center in Vancouver, Canada. RESULTS: A total of 215 patients completed the questionnaire and 84 completed a retest. On average, patients answered 5.2 items per CAT-5D-QOL scale. Reliability ranged from 0.83 (FEEL) to 0.92 (PAIN) and was 0.92 for the MODI, RMDQ, and Physical Component Summary (PCS-36). The ceiling effect was 0.5\% for PAIN compared with 2\% for MODI and 5\% for RMQ. The CAT-5D-QOL scales correlated as anticipated with other measures of HRQL and discriminated well according to the level of satisfaction with current symptoms, duration of the last episode, sciatica, and disability compensation. The average relative discrimination index was 0.87 for PAIN, 0.67 for DAILY and 0.62 for WALK, compared with 0.89 for MODI, 0.80 for RMDQ, and 0.59 for PCS-36. CONCLUSION: The CAT-5D-QOL is feasible, reliable, valid, and efficient in patients with back pain. This methodology can be recommended for use in back pain research and should improve outcome assessment, facilitate comparisons across studies, and reduce patient burden.}, keywords = {*Disability Evaluation, *Health Status Indicators, *Quality of Life, Adult, Aged, Algorithms, Back Pain/*diagnosis/psychology, British Columbia, Diagnosis, Computer-Assisted/*standards, Feasibility Studies, Female, Humans, Internet, Male, Middle Aged, Predictive Value of Tests, Questionnaires/*standards, Reproducibility of Results}, isbn = {1528-1159 (Electronic)0362-2436 (Linking)}, author = {Kopec, J. A. and Badii, M. and McKenna, M. and Lima, V. D. and Sayre, E. C. and Dvorak, M.} } @article {287, title = {Measuring physical functioning in children with spinal impairments with computerized adaptive testing}, journal = {Journal of Pediatric Orthopedics}, volume = {28}, number = {3}, year = {2008}, note = {Mulcahey, M JHaley, Stephen MDuffy, TheresaPengsheng, NiBetz, Randal RK02 HD045354-01A1/HD/NICHD NIH HHS/United StatesUnited StatesJournal of pediatric orthopedicsJ Pediatr Orthop. 2008 Apr-May;28(3):330-5.}, month = {Apr-May}, pages = {330-5}, edition = {2008/03/26}, abstract = {BACKGROUND: The purpose of this study was to assess the utility of measuring current physical functioning status of children with scoliosis and kyphosis by applying computerized adaptive testing (CAT) methods. Computerized adaptive testing uses a computer interface to administer the most optimal items based on previous responses, reducing the number of items needed to obtain a scoring estimate. METHODS: This was a prospective study of 77 subjects (0.6-19.8 years) who were seen by a spine surgeon during a routine clinic visit for progress spine deformity. Using a multidimensional version of the Pediatric Evaluation of Disability Inventory CAT program (PEDI-MCAT), we evaluated content range, accuracy and efficiency, known-group validity, concurrent validity with the Pediatric Outcomes Data Collection Instrument, and test-retest reliability in a subsample (n = 16) within a 2-week interval. RESULTS: We found the PEDI-MCAT to have sufficient item coverage in both self-care and mobility content for this sample, although most patients tended to score at the higher ends of both scales. Both the accuracy of PEDI-MCAT scores as compared with a fixed format of the PEDI (r = 0.98 for both mobility and self-care) and test-retest reliability were very high [self-care: intraclass correlation (3,1) = 0.98, mobility: intraclass correlation (3,1) = 0.99]. The PEDI-MCAT took an average of 2.9 minutes for the parents to complete. The PEDI-MCAT detected expected differences between patient groups, and scores on the PEDI-MCAT correlated in expected directions with scores from the Pediatric Outcomes Data Collection Instrument domains. CONCLUSIONS: Use of the PEDI-MCAT to assess the physical functioning status, as perceived by parents of children with complex spinal impairments, seems to be feasible and achieves accurate and efficient estimates of self-care and mobility function. Additional item development will be needed at the higher functioning end of the scale to avoid ceiling effects for older children. LEVEL OF EVIDENCE: This is a level II prospective study designed to establish the utility of computer adaptive testing as an evaluation method in a busy pediatric spine practice.}, keywords = {*Disability Evaluation, Adolescent, Child, Child, Preschool, Computer Simulation, Cross-Sectional Studies, Disabled Children/*rehabilitation, Female, Humans, Infant, Kyphosis/*diagnosis/rehabilitation, Male, Prospective Studies, Reproducibility of Results, Scoliosis/*diagnosis/rehabilitation}, isbn = {0271-6798 (Print)0271-6798 (Linking)}, author = {Mulcahey, M. J. and Haley, S. M. and Duffy, T. and Pengsheng, N. and Betz, R. R.} } @article {152, title = {Using computerized adaptive testing to reduce the burden of mental health assessment}, journal = {Psychiatric Services}, volume = {59}, number = {4}, year = {2008}, note = {Gibbons, Robert DWeiss, David JKupfer, David JFrank, EllenFagiolini, AndreaGrochocinski, Victoria JBhaumik, Dulal KStover, AngelaBock, R DarrellImmekus, Jason CR01-MH-30915/MH/United States NIMHR01-MH-66302/MH/United States NIMHResearch Support, N.I.H., ExtramuralUnited StatesPsychiatric services (Washington, D.C.)Psychiatr Serv. 2008 Apr;59(4):361-8.}, month = {Apr}, pages = {361-8}, edition = {2008/04/02}, abstract = {OBJECTIVE: This study investigated the combination of item response theory and computerized adaptive testing (CAT) for psychiatric measurement as a means of reducing the burden of research and clinical assessments. METHODS: Data were from 800 participants in outpatient treatment for a mood or anxiety disorder; they completed 616 items of the 626-item Mood and Anxiety Spectrum Scales (MASS) at two times. The first administration was used to design and evaluate a CAT version of the MASS by using post hoc simulation. The second confirmed the functioning of CAT in live testing. RESULTS: Tests of competing models based on item response theory supported the scale{\textquoteright}s bifactor structure, consisting of a primary dimension and four group factors (mood, panic-agoraphobia, obsessive-compulsive, and social phobia). Both simulated and live CAT showed a 95\% average reduction (585 items) in items administered (24 and 30 items, respectively) compared with administration of the full MASS. The correlation between scores on the full MASS and the CAT version was .93. For the mood disorder subscale, differences in scores between two groups of depressed patients--one with bipolar disorder and one without--on the full scale and on the CAT showed effect sizes of .63 (p<.003) and 1.19 (p<.001) standard deviation units, respectively, indicating better discriminant validity for CAT. CONCLUSIONS: Instead of using small fixed-length tests, clinicians can create item banks with a large item pool, and a small set of the items most relevant for a given individual can be administered with no loss of information, yielding a dramatic reduction in administration time and patient and clinician burden.}, keywords = {*Diagnosis, Computer-Assisted, *Questionnaires, Adolescent, Adult, Aged, Agoraphobia/diagnosis, Anxiety Disorders/diagnosis, Bipolar Disorder/diagnosis, Female, Humans, Male, Mental Disorders/*diagnosis, Middle Aged, Mood Disorders/diagnosis, Obsessive-Compulsive Disorder/diagnosis, Panic Disorder/diagnosis, Phobic Disorders/diagnosis, Reproducibility of Results, Time Factors}, isbn = {1075-2730 (Print)}, author = {Gibbons, R. D. and Weiss, D. J. and Kupfer, D. J. and Frank, E. and Fagiolini, A. and Grochocinski, V. J. and Bhaumik, D. K. and Stover, A. and Bock, R. D. and Immekus, J. C.} } @article {135, title = {Computerized adaptive personality testing: A review and illustration with the MMPI-2 Computerized Adaptive Version}, journal = {Psychological Assessment}, volume = {19}, number = {1}, year = {2007}, note = {Forbey, Johnathan DBen-Porath, Yossef SResearch Support, Non-U.S. Gov{\textquoteright}tUnited StatesPsychological assessmentPsychol Assess. 2007 Mar;19(1):14-24.}, month = {Mar}, pages = {14-24}, edition = {2007/03/21}, abstract = {Computerized adaptive testing in personality assessment can improve efficiency by significantly reducing the number of items administered to answer an assessment question. Two approaches have been explored for adaptive testing in computerized personality assessment: item response theory and the countdown method. In this article, the authors review the literature on each and report the results of an investigation designed to explore the utility, in terms of item and time savings, and validity, in terms of correlations with external criterion measures, of an expanded countdown method-based research version of the Minnesota Multiphasic Personality Inventory-2 (MMPI-2), the MMPI-2 Computerized Adaptive Version (MMPI-2-CA). Participants were 433 undergraduate college students (170 men and 263 women). Results indicated considerable item savings and corresponding time savings for the adaptive testing modalities compared with a conventional computerized MMPI-2 administration. Furthermore, computerized adaptive administration yielded comparable results to computerized conventional administration of the MMPI-2 in terms of both test scores and their validity. Future directions for computerized adaptive personality testing are discussed.}, keywords = {Adolescent, Adult, Diagnosis, Computer-Assisted/*statistics \& numerical data, Female, Humans, Male, MMPI/*statistics \& numerical data, Personality Assessment/*statistics \& numerical data, Psychometrics/statistics \& numerical data, Reference Values, Reproducibility of Results}, isbn = {1040-3590 (Print)}, author = {Forbey, J. D. and Ben-Porath, Y. S.} } @article {29, title = {Developing tailored instruments: item banking and computerized adaptive assessment}, journal = {Quality of Life Research}, volume = {16}, number = {Suppl 1}, year = {2007}, note = {Bjorner, Jakob BueChang, Chih-HungThissen, DavidReeve, Bryce B1R43NS047763-01/NS/United States NINDSAG015815/AG/United States NIAResearch Support, N.I.H., ExtramuralNetherlandsQuality of life research : an international journal of quality of life aspects of treatment, care and rehabilitationQual Life Res. 2007;16 Suppl 1:95-108. Epub 2007 Feb 15.}, pages = {95-108}, edition = {2007/05/29}, abstract = {Item banks and Computerized Adaptive Testing (CAT) have the potential to greatly improve the assessment of health outcomes. This review describes the unique features of item banks and CAT and discusses how to develop item banks. In CAT, a computer selects the items from an item bank that are most relevant for and informative about the particular respondent; thus optimizing test relevance and precision. Item response theory (IRT) provides the foundation for selecting the items that are most informative for the particular respondent and for scoring responses on a common metric. The development of an item bank is a multi-stage process that requires a clear definition of the construct to be measured, good items, a careful psychometric analysis of the items, and a clear specification of the final CAT. The psychometric analysis needs to evaluate the assumptions of the IRT model such as unidimensionality and local independence; that the items function the same way in different subgroups of the population; and that there is an adequate fit between the data and the chosen item response models. Also, interpretation guidelines need to be established to help the clinical application of the assessment. Although medical research can draw upon expertise from educational testing in the development of item banks and CAT, the medical field also encounters unique opportunities and challenges.}, keywords = {*Health Status, *Health Status Indicators, *Mental Health, *Outcome Assessment (Health Care), *Quality of Life, *Questionnaires, *Software, Algorithms, Factor Analysis, Statistical, Humans, Models, Statistical, Psychometrics}, isbn = {0962-9343 (Print)}, author = {Bjorner, J. B. and Chang, C-H. and Thissen, D. and Reeve, B. B.} } @article {86, title = {IRT health outcomes data analysis project: an overview and summary}, journal = {Quality of Life Research}, volume = {16}, number = {Suppl. 1}, year = {2007}, note = {Cook, Karon FTeal, Cayla RBjorner, Jakob BCella, DavidChang, Chih-HungCrane, Paul KGibbons, Laura EHays, Ron DMcHorney, Colleen AOcepek-Welikson, KatjaRaczek, Anastasia ETeresi, Jeanne AReeve, Bryce B1U01AR52171-01/AR/United States NIAMSR01 (CA60068)/CA/United States NCIY1-PC-3028-01/PC/United States NCIResearch Support, N.I.H., ExtramuralNetherlandsQuality of life research : an international journal of quality of life aspects of treatment, care and rehabilitationQual Life Res. 2007;16 Suppl 1:121-32. Epub 2007 Mar 10.}, pages = {121-132}, edition = {2007/03/14}, abstract = {BACKGROUND: In June 2004, the National Cancer Institute and the Drug Information Association co-sponsored the conference, "Improving the Measurement of Health Outcomes through the Applications of Item Response Theory (IRT) Modeling: Exploration of Item Banks and Computer-Adaptive Assessment." A component of the conference was presentation of a psychometric and content analysis of a secondary dataset. OBJECTIVES: A thorough psychometric and content analysis was conducted of two primary domains within a cancer health-related quality of life (HRQOL) dataset. RESEARCH DESIGN: HRQOL scales were evaluated using factor analysis for categorical data, IRT modeling, and differential item functioning analyses. In addition, computerized adaptive administration of HRQOL item banks was simulated, and various IRT models were applied and compared. SUBJECTS: The original data were collected as part of the NCI-funded Quality of Life Evaluation in Oncology (Q-Score) Project. A total of 1,714 patients with cancer or HIV/AIDS were recruited from 5 clinical sites. MEASURES: Items from 4 HRQOL instruments were evaluated: Cancer Rehabilitation Evaluation System-Short Form, European Organization for Research and Treatment of Cancer Quality of Life Questionnaire, Functional Assessment of Cancer Therapy and Medical Outcomes Study Short-Form Health Survey. RESULTS AND CONCLUSIONS: Four lessons learned from the project are discussed: the importance of good developmental item banks, the ambiguity of model fit results, the limits of our knowledge regarding the practical implications of model misfit, and the importance in the measurement of HRQOL of construct definition. With respect to these lessons, areas for future research are suggested. The feasibility of developing item banks for broad definitions of health is discussed.}, keywords = {*Data Interpretation, Statistical, *Health Status, *Quality of Life, *Questionnaires, *Software, Female, HIV Infections/psychology, Humans, Male, Neoplasms/psychology, Outcome Assessment (Health Care)/*methods, Psychometrics, Stress, Psychological}, isbn = {0962-9343 (Print)}, author = {Cook, K. F. and Teal, C. R. and Bjorner, J. B. and Cella, D. and Chang, C-H. and Crane, P. K. and Gibbons, L. E. and Hays, R. D. and McHorney, C. A. and Ocepek-Welikson, K. and Raczek, A. E. and Teresi, J. A. and Reeve, B. B.} } @article {328, title = {Psychometric evaluation and calibration of health-related quality of life item banks: plans for the Patient-Reported Outcomes Measurement Information System (PROMIS)}, journal = {Medical Care}, volume = {45}, number = {5 Suppl 1}, year = {2007}, note = {Reeve, Bryce BHays, Ron DBjorner, Jakob BCook, Karon FCrane, Paul KTeresi, Jeanne AThissen, DavidRevicki, Dennis AWeiss, David JHambleton, Ronald KLiu, HonghuGershon, RichardReise, Steven PLai, Jin-sheiCella, DavidPROMIS Cooperative GroupAG015815/AG/United States NIAResearch Support, N.I.H., ExtramuralUnited StatesMedical careMed Care. 2007 May;45(5 Suppl 1):S22-31.}, month = {May}, pages = {S22-31}, edition = {2007/04/20}, abstract = {BACKGROUND: The construction and evaluation of item banks to measure unidimensional constructs of health-related quality of life (HRQOL) is a fundamental objective of the Patient-Reported Outcomes Measurement Information System (PROMIS) project. OBJECTIVES: Item banks will be used as the foundation for developing short-form instruments and enabling computerized adaptive testing. The PROMIS Steering Committee selected 5 HRQOL domains for initial focus: physical functioning, fatigue, pain, emotional distress, and social role participation. This report provides an overview of the methods used in the PROMIS item analyses and proposed calibration of item banks. ANALYSES: Analyses include evaluation of data quality (eg, logic and range checking, spread of response distribution within an item), descriptive statistics (eg, frequencies, means), item response theory model assumptions (unidimensionality, local independence, monotonicity), model fit, differential item functioning, and item calibration for banking. RECOMMENDATIONS: Summarized are key analytic issues; recommendations are provided for future evaluations of item banks in HRQOL assessment.}, keywords = {*Health Status, *Information Systems, *Quality of Life, *Self Disclosure, Adolescent, Adult, Aged, Calibration, Databases as Topic, Evaluation Studies as Topic, Female, Humans, Male, Middle Aged, Outcome Assessment (Health Care)/*methods, Psychometrics, Questionnaires/standards, United States}, isbn = {0025-7079 (Print)}, author = {Reeve, B. B. and Hays, R. D. and Bjorner, J. B. and Cook, K. F. and Crane, P. K. and Teresi, J. A. and Thissen, D. and Revicki, D. A. and Weiss, D. J. and Hambleton, R. K. and Liu, H. and Gershon, R. C. and Reise, S. P. and Lai, J. S. and Cella, D.} } @article {172, title = {Computer adaptive testing improved accuracy and precision of scores over random item selection in a physical functioning item bank}, journal = {Journal of Clinical Epidemiology}, volume = {59}, number = {11}, year = {2006}, note = {Haley, Stephen MNi, PengshengHambleton, Ronald KSlavin, Mary DJette, Alan MK02 hd45354-01/hd/nichdR01 hd043568/hd/nichdComparative StudyResearch Support, N.I.H., ExtramuralResearch Support, U.S. Gov{\textquoteright}t, Non-P.H.S.EnglandJournal of clinical epidemiologyJ Clin Epidemiol. 2006 Nov;59(11):1174-82. Epub 2006 Jul 11.}, month = {Nov}, pages = {1174-82}, edition = {2006/10/10}, abstract = {BACKGROUND AND OBJECTIVE: Measuring physical functioning (PF) within and across postacute settings is critical for monitoring outcomes of rehabilitation; however, most current instruments lack sufficient breadth and feasibility for widespread use. Computer adaptive testing (CAT), in which item selection is tailored to the individual patient, holds promise for reducing response burden, yet maintaining measurement precision. We calibrated a PF item bank via item response theory (IRT), administered items with a post hoc CAT design, and determined whether CAT would improve accuracy and precision of score estimates over random item selection. METHODS: 1,041 adults were interviewed during postacute care rehabilitation episodes in either hospital or community settings. Responses for 124 PF items were calibrated using IRT methods to create a PF item bank. We examined the accuracy and precision of CAT-based scores compared to a random selection of items. RESULTS: CAT-based scores had higher correlations with the IRT-criterion scores, especially with short tests, and resulted in narrower confidence intervals than scores based on a random selection of items; gains, as expected, were especially large for low and high performing adults. CONCLUSION: The CAT design may have important precision and efficiency advantages for point-of-care functional assessment in rehabilitation practice settings.}, keywords = {*Recovery of Function, Activities of Daily Living, Adolescent, Adult, Aged, Aged, 80 and over, Confidence Intervals, Factor Analysis, Statistical, Female, Humans, Male, Middle Aged, Outcome Assessment (Health Care)/*methods, Rehabilitation/*standards, Reproducibility of Results, Software}, isbn = {0895-4356 (Print)}, author = {Haley, S. M. and Ni, P. and Hambleton, R. K. and Slavin, M. D. and Jette, A. M.} } @article {176, title = {Computerized adaptive testing for follow-up after discharge from inpatient rehabilitation: I. Activity outcomes}, journal = {Archives of Physical Medicine and Rehabilitation}, volume = {87}, number = {8}, year = {2006}, note = {Haley, Stephen MSiebens, HilaryCoster, Wendy JTao, WeiBlack-Schaffer, Randie MGandek, BarbaraSinclair, Samuel JNi, PengshengK0245354-01/phsR01 hd043568/hd/nichdResearch Support, N.I.H., ExtramuralUnited StatesArchives of physical medicine and rehabilitationArch Phys Med Rehabil. 2006 Aug;87(8):1033-42.}, month = {Aug}, pages = {1033-42}, edition = {2006/08/01}, abstract = {OBJECTIVE: To examine score agreement, precision, validity, efficiency, and responsiveness of a computerized adaptive testing (CAT) version of the Activity Measure for Post-Acute Care (AM-PAC-CAT) in a prospective, 3-month follow-up sample of inpatient rehabilitation patients recently discharged home. DESIGN: Longitudinal, prospective 1-group cohort study of patients followed approximately 2 weeks after hospital discharge and then 3 months after the initial home visit. SETTING: Follow-up visits conducted in patients{\textquoteright} home setting. PARTICIPANTS: Ninety-four adults who were recently discharged from inpatient rehabilitation, with diagnoses of neurologic, orthopedic, and medically complex conditions. INTERVENTIONS: Not applicable. MAIN OUTCOME MEASURES: Summary scores from AM-PAC-CAT, including 3 activity domains of movement and physical, personal care and instrumental, and applied cognition were compared with scores from a traditional fixed-length version of the AM-PAC with 66 items (AM-PAC-66). RESULTS: AM-PAC-CAT scores were in good agreement (intraclass correlation coefficient model 3,1 range, .77-.86) with scores from the AM-PAC-66. On average, the CAT programs required 43\% of the time and 33\% of the items compared with the AM-PAC-66. Both formats discriminated across functional severity groups. The standardized response mean (SRM) was greater for the movement and physical fixed form than the CAT; the effect size and SRM of the 2 other AM-PAC domains showed similar sensitivity between CAT and fixed formats. Using patients{\textquoteright} own report as an anchor-based measure of change, the CAT and fixed length formats were comparable in responsiveness to patient-reported change over a 3-month interval. CONCLUSIONS: Accurate estimates for functional activity group-level changes can be obtained from CAT administrations, with a considerable reduction in administration time.}, keywords = {*Activities of Daily Living, *Adaptation, Physiological, *Computer Systems, *Questionnaires, Adult, Aged, Aged, 80 and over, Chi-Square Distribution, Factor Analysis, Statistical, Female, Humans, Longitudinal Studies, Male, Middle Aged, Outcome Assessment (Health Care)/*methods, Patient Discharge, Prospective Studies, Rehabilitation/*standards, Subacute Care/*standards}, isbn = {0003-9993 (Print)}, author = {Haley, S. M. and Siebens, H. and Coster, W. J. and Tao, W. and Black-Schaffer, R. M. and Gandek, B. and Sinclair, S. J. and Ni, P.} } @article {352, title = {Computerized adaptive testing of diabetes impact: a feasibility study of Hispanics and non-Hispanics in an active clinic population}, journal = {Quality of Life Research}, volume = {15}, number = {9}, year = {2006}, note = {Schwartz, CarolynWelch, GarrySantiago-Kelley, PaulaBode, RitaSun, Xiaowu1 r43 dk066874-01/dk/niddkResearch Support, N.I.H., ExtramuralNetherlandsQuality of life research : an international journal of quality of life aspects of treatment, care and rehabilitationQual Life Res. 2006 Nov;15(9):1503-18. Epub 2006 Sep 26.}, month = {Nov}, pages = {1503-18}, edition = {2006/10/13}, abstract = {BACKGROUND: Diabetes is a leading cause of death and disability in the US and is twice as common among Hispanic Americans as non-Hispanics. The societal costs of diabetes provide an impetus for developing tools that can improve patient care and delay or prevent diabetes complications. METHODS: We implemented a feasibility study of a Computerized Adaptive Test (CAT) to measure diabetes impact using a sample of 103 English- and 97 Spanish-speaking patients (mean age = 56.5, 66.5\% female) in a community medical center with a high proportion of minority patients (28\% African-American). The 37 items of the Diabetes Impact Survey were translated using forward-backward translation and cognitive debriefing. Participants were randomized to receive either the full-length tool or the Diabetes-CAT first, in the patient{\textquoteright}s native language. RESULTS: The number of items and the amount of time to complete the survey for the CAT was reduced to one-sixth the amount for the full-length tool in both languages, across disease severity. Confirmatory Factor Analysis confirmed that the Diabetes Impact Survey is unidimensional. The Diabetes-CAT demonstrated acceptable internal consistency reliability, construct validity, and discriminant validity in the overall sample, although subgroup analyses suggested that the English sample data evidenced higher levels of reliability and validity than the Spanish sample and issues with discriminant validity in the Spanish sample. Differential Item Function analysis revealed differences in responses tendencies by language group in 3 of the 37 items. Participant interviews suggested that the Spanish-speaking patients generally preferred the paper survey to the computer-assisted tool, and were twice as likely to experience difficulties understanding the items. CONCLUSIONS: While the Diabetes-CAT demonstrated clear advantages in reducing respondent burden as compared to the full-length tool, simplifying the item bank will be necessary for enhancing the feasibility of the Diabetes-CAT for use with low literacy patients.}, keywords = {*Computers, *Hispanic Americans, *Quality of Life, Adult, Aged, Data Collection/*methods, Diabetes Mellitus/*psychology, Feasibility Studies, Female, Humans, Language, Male, Middle Aged}, isbn = {0962-9343 (Print)}, author = {Schwartz, C. and Welch, G. and Santiago-Kelley, P. and Bode, R. and Sun, X.} } @article {237, title = {Factor analysis techniques for assessing sufficient unidimensionality of cancer related fatigue}, journal = {Quality of Life Research}, volume = {15}, number = {7}, year = {2006}, note = {0962-9343 (Print)Journal ArticleResearch Support, N.I.H., Extramural}, month = {Sep}, pages = {1179-90}, abstract = {BACKGROUND: Fatigue is the most common unrelieved symptom experienced by people with cancer. The purpose of this study was to examine whether cancer-related fatigue (CRF) can be summarized using a single score, that is, whether CRF is sufficiently unidimensional for measurement approaches that require or assume unidimensionality. We evaluated this question using factor analysis techniques including the theory-driven bi-factor model. METHODS: Five hundred and fifty five cancer patients from the Chicago metropolitan area completed a 72-item fatigue item bank, covering a range of fatigue-related concerns including intensity, frequency and interference with physical, mental, and social activities. Dimensionality was assessed using exploratory and confirmatory factor analysis (CFA) techniques. RESULTS: Exploratory factor analysis (EFA) techniques identified from 1 to 17 factors. The bi-factor model suggested that CRF was sufficiently unidimensional. CONCLUSIONS: CRF can be considered sufficiently unidimensional for applications that require unidimensionality. One such application, item response theory (IRT), will facilitate the development of short-form and computer-adaptive testing. This may further enable practical and accurate clinical assessment of CRF.}, keywords = {*Factor Analysis, Statistical, *Quality of Life, Aged, Chicago, Fatigue/*etiology, Female, Humans, Male, Middle Aged, Neoplasms/*complications, Questionnaires}, author = {Lai, J-S. and Crane, P. K. and Cella, D.} } @article {311, title = {Multidimensional computerized adaptive testing of the EORTC QLQ-C30: basic developments and evaluations}, journal = {Quality of Life Research}, volume = {15}, number = {3}, year = {2006}, note = {Petersen, Morten AaGroenvold, MogensAaronson, NeilFayers, PeterSprangers, MirjamBjorner, Jakob BEuropean Organisation for Research and Treatment of Cancer Quality of Life GroupResearch Support, Non-U.S. Gov{\textquoteright}tNetherlandsQuality of life research : an international journal of quality of life aspects of treatment, care and rehabilitationQual Life Res. 2006 Apr;15(3):315-29.}, month = {Apr}, pages = {315-29}, edition = {2006/03/21}, abstract = {OBJECTIVE: Self-report questionnaires are widely used to measure health-related quality of life (HRQOL). Ideally, such questionnaires should be adapted to the individual patient and at the same time scores should be directly comparable across patients. This may be achieved using computerized adaptive testing (CAT). Usually, CAT is carried out for a single domain at a time. However, many HRQOL domains are highly correlated. Multidimensional CAT may utilize these correlations to improve measurement efficiency. We investigated the possible advantages and difficulties of multidimensional CAT. STUDY DESIGN AND SETTING: We evaluated multidimensional CAT of three scales from the EORTC QLQ-C30: the physical functioning, emotional functioning, and fatigue scales. Analyses utilised a database with 2958 European cancer patients. RESULTS: It was possible to obtain scores for the three domains with five to seven items administered using multidimensional CAT that were very close to the scores obtained using all 12 items and with no or little loss of measurement precision. CONCLUSION: The findings suggest that multidimensional CAT may significantly improve measurement precision and efficiency and encourage further research into multidimensional CAT. Particularly, the estimation of the model underlying the multidimensional CAT and the conceptual aspects need further investigations.}, keywords = {*Quality of Life, *Self Disclosure, Adult, Female, Health Status, Humans, Male, Middle Aged, Questionnaires/*standards, User-Computer Interface}, isbn = {0962-9343 (Print)}, author = {Petersen, M. A. and Groenvold, M. and Aaronson, N. K. and Fayers, P. and Sprangers, M. and Bjorner, J. B.} } @article {384, title = {Overview of quantitative measurement methods. Equivalence, invariance, and differential item functioning in health applications}, journal = {Medical Care}, volume = {44}, number = {11 Suppl 3}, year = {2006}, note = {Teresi, Jeanne AAG15294/AG/NIA NIH HHS/United StatesResearch Support, N.I.H., ExtramuralResearch Support, Non-U.S. Gov{\textquoteright}tReviewUnited StatesMedical careMed Care. 2006 Nov;44(11 Suppl 3):S39-49.}, month = {Nov}, pages = {S39-49}, edition = {2006/10/25}, abstract = {BACKGROUND: Reviewed in this article are issues relating to the study of invariance and differential item functioning (DIF). The aim of factor analyses and DIF, in the context of invariance testing, is the examination of group differences in item response conditional on an estimate of disability. Discussed are parameters and statistics that are not invariant and cannot be compared validly in crosscultural studies with varying distributions of disability in contrast to those that can be compared (if the model assumptions are met) because they are produced by models such as linear and nonlinear regression. OBJECTIVES: The purpose of this overview is to provide an integrated approach to the quantitative methods used in this special issue to examine measurement equivalence. The methods include classical test theory (CTT), factor analytic, and parametric and nonparametric approaches to DIF detection. Also included in the quantitative section is a discussion of item banking and computerized adaptive testing (CAT). METHODS: Factorial invariance and the articles discussing this topic are introduced. A brief overview of the DIF methods presented in the quantitative section of the special issue is provided together with a discussion of ways in which DIF analyses and examination of invariance using factor models may be complementary. CONCLUSIONS: Although factor analytic and DIF detection methods share features, they provide unique information and can be viewed as complementary in informing about measurement equivalence.}, keywords = {*Cross-Cultural Comparison, Data Interpretation, Statistical, Factor Analysis, Statistical, Guidelines as Topic, Humans, Models, Statistical, Psychometrics/*methods, Statistics as Topic/*methods, Statistics, Nonparametric}, isbn = {0025-7079 (Print)0025-7079 (Linking)}, author = {Teresi, J. A.} } @article {184, title = {Simulated computerized adaptive test for patients with shoulder impairments was efficient and produced valid measures of function}, journal = {Journal of Clinical Epidemiology}, volume = {59}, number = {3}, year = {2006}, note = {0895-4356 (Print)Journal ArticleValidation Studies}, pages = {290-8}, abstract = {BACKGROUND AND OBJECTIVE: To test unidimensionality and local independence of a set of shoulder functional status (SFS) items, develop a computerized adaptive test (CAT) of the items using a rating scale item response theory model (RSM), and compare discriminant validity of measures generated using all items (theta(IRT)) and measures generated using the simulated CAT (theta(CAT)). STUDY DESIGN AND SETTING: We performed a secondary analysis of data collected prospectively during rehabilitation of 400 patients with shoulder impairments who completed 60 SFS items. RESULTS: Factor analytic techniques supported that the 42 SFS items formed a unidimensional scale and were locally independent. Except for five items, which were deleted, the RSM fit the data well. The remaining 37 SFS items were used to generate the CAT. On average, 6 items were needed to estimate precise measures of function using the SFS CAT, compared with all 37 SFS items. The theta(IRT) and theta(CAT) measures were highly correlated (r = .96) and resulted in similar classifications of patients. CONCLUSION: The simulated SFS CAT was efficient and produced precise, clinically relevant measures of functional status with good discriminating ability.}, keywords = {*Computer Simulation, *Range of Motion, Articular, Activities of Daily Living, Adult, Aged, Aged, 80 and over, Factor Analysis, Statistical, Female, Humans, Male, Middle Aged, Prospective Studies, Reproducibility of Results, Research Support, N.I.H., Extramural, Research Support, U.S. Gov{\textquoteright}t, Non-P.H.S., Shoulder Dislocation/*physiopathology/psychology/rehabilitation, Shoulder Pain/*physiopathology/psychology/rehabilitation, Shoulder/*physiopathology, Sickness Impact Profile, Treatment Outcome}, author = {Hart, D. L. and Cook, K. F. and Mioduski, J. E. and Teal, C. R. and Crane, P. K.} } @article {2074, title = {Simulated computerized adaptive test for patients with shoulder impairments was efficient and produced valid measures of function}, journal = {Journal of Clinical Epidemiology}, volume = {59}, year = {2006}, pages = {290-298}, abstract = {Background and Objective: To test unidimensionality and local independence of a set of shoulder functional status (SFS) items,
develop a computerized adaptive test (CAT) of the items using a rating scale item response theory model (RSM), and compare discriminant validity of measures generated using all items (qIRT) and measures generated using the simulated CAT (qCAT).
Study Design and Setting: We performed a secondary analysis of data collected prospectively during rehabilitation of 400 patients
with shoulder impairments who completed 60 SFS items.
Results: Factor analytic techniques supported that the 42 SFS items formed a unidimensional scale and were locally independent. Except for five items, which were deleted, the RSM fit the data well. The remaining 37 SFS items were used to generate the CAT. On average, 6 items on were needed to estimate precise measures of function using the SFS CAT, compared with all 37 SFS items. The qIRT and qCAT measures were highly correlated (r 5 .96) and resulted in similar classifications of patients.
Conclusion: The simulated SFS CAT was efficient and produced precise, clinically relevant measures of functional status with good
discriminating ability.\