@article {143, title = {Progress in assessing physical function in arthritis: PROMIS short forms and computerized adaptive testing}, journal = {Journal of Rheumatology}, volume = {36}, number = {9}, year = {2009}, note = {Fries, James FCella, DavidRose, MatthiasKrishnan, EswarBruce, BonnieU01 AR052158/AR/NIAMS NIH HHS/United StatesU01 AR52177/AR/NIAMS NIH HHS/United StatesConsensus Development ConferenceResearch Support, N.I.H., ExtramuralCanadaThe Journal of rheumatologyJ Rheumatol. 2009 Sep;36(9):2061-6.}, month = {Sep}, pages = {2061-2066}, edition = {2009/09/10}, abstract = {OBJECTIVE: Assessing self-reported physical function/disability with the Health Assessment Questionnaire Disability Index (HAQ) and other instruments has become central in arthritis research. Item response theory (IRT) and computerized adaptive testing (CAT) techniques can increase reliability and statistical power. IRT-based instruments can improve measurement precision substantially over a wider range of disease severity. These modern methods were applied and the magnitude of improvement was estimated. METHODS: A 199-item physical function/disability item bank was developed by distilling 1865 items to 124, including Legacy Health Assessment Questionnaire (HAQ) and Physical Function-10 items, and improving precision through qualitative and quantitative evaluation in over 21,000 subjects, which included about 1500 patients with rheumatoid arthritis and osteoarthritis. Four new instruments, (A) Patient-Reported Outcomes Measurement Information (PROMIS) HAQ, which evolved from the original (Legacy) HAQ; (B) "best" PROMIS 10; (C) 20-item static (short) forms; and (D) simulated PROMIS CAT, which sequentially selected the most informative item, were compared with the HAQ. RESULTS: Online and mailed administration modes yielded similar item and domain scores. The HAQ and PROMIS HAQ 20-item scales yielded greater information content versus other scales in patients with more severe disease. The "best" PROMIS 20-item scale outperformed the other 20-item static forms over a broad range of 4 standard deviations. The 10-item simulated PROMIS CAT outperformed all other forms. CONCLUSION: Improved items and instruments yielded better information. The PROMIS HAQ is currently available and considered validated. The new PROMIS short forms, after validation, are likely to represent further improvement. CAT-based physical function/disability assessment offers superior performance over static forms of equal length.}, keywords = {*Disability Evaluation, *Outcome Assessment (Health Care), Arthritis/diagnosis/*physiopathology, Health Surveys, Humans, Prognosis, Reproducibility of Results}, isbn = {0315-162X (Print)0315-162X (Linking)}, author = {Fries, J.F. and Cella, D. and Rose, M. and Krishnan, E. and Bruce, B.} } @article {52, title = {Improving patient reported outcomes using item response theory and computerized adaptive testing}, journal = {Journal of Rheumatology}, volume = {34}, number = {6}, year = {2007}, note = {Chakravarty, Eliza FBjorner, Jakob BFries, James FAr052158/ar/niamsConsensus Development ConferenceResearch Support, N.I.H., ExtramuralCanadaThe Journal of rheumatologyJ Rheumatol. 2007 Jun;34(6):1426-31.}, month = {Jun}, pages = {1426-31}, edition = {2007/06/07}, abstract = {OBJECTIVE: Patient reported outcomes (PRO) are considered central outcome measures for both clinical trials and observational studies in rheumatology. More sophisticated statistical models, including item response theory (IRT) and computerized adaptive testing (CAT), will enable critical evaluation and reconstruction of currently utilized PRO instruments to improve measurement precision while reducing item burden on the individual patient. METHODS: We developed a domain hierarchy encompassing the latent trait of physical function/disability from the more general to most specific. Items collected from 165 English-language instruments were evaluated by a structured process including trained raters, modified Delphi expert consensus, and then patient evaluation. Each item in the refined data bank will undergo extensive analysis using IRT to evaluate response functions and measurement precision. CAT will allow for real-time questionnaires of potentially smaller numbers of questions tailored directly to each individual{\textquoteright}s level of physical function. RESULTS: Physical function/disability domain comprises 4 subdomains: upper extremity, trunk, lower extremity, and complex activities. Expert and patient review led to consensus favoring use of present-tense "capability" questions using a 4- or 5-item Likert response construct over past-tense "performance"items. Floor and ceiling effects, attribution of disability, and standardization of response categories were also addressed. CONCLUSION: By applying statistical techniques of IRT through use of CAT, existing PRO instruments may be improved to reduce questionnaire burden on the individual patients while increasing measurement precision that may ultimately lead to reduced sample size requirements for costly clinical trials.}, keywords = {*Rheumatic Diseases/physiopathology/psychology, Clinical Trials, Data Interpretation, Statistical, Disability Evaluation, Health Surveys, Humans, International Cooperation, Outcome Assessment (Health Care)/*methods, Patient Participation/*methods, Research Design/*trends, Software}, isbn = {0315-162X (Print)}, author = {Chakravarty, E. F. and Bjorner, J. B. and Fries, J.F.} } @article {150, title = {Computer adaptive testing}, journal = {Journal of Applied Measurement}, volume = {6}, number = {1}, year = {2005}, note = {Gershon, Richard CReviewUnited StatesJournal of applied measurementJ Appl Meas. 2005;6(1):109-27.}, pages = {109-27}, edition = {2005/02/11}, abstract = {The creation of item response theory (IRT) and Rasch models, inexpensive accessibility to high speed desktop computers, and the growth of the Internet, has led to the creation and growth of computerized adaptive testing or CAT. This form of assessment is applicable for both high stakes tests such as certification or licensure exams, as well as health related quality of life surveys. This article discusses the historical background of CAT including its many advantages over conventional (typically paper and pencil) alternatives. The process of CAT is then described including descriptions of the specific differences of using CAT based upon 1-, 2- and 3-parameter IRT and various Rasch models. Numerous specific topics describing CAT in practice are described including: initial item selection, content balancing, test difficulty, test length and stopping rules. The article concludes with the author{\textquoteright}s reflections regarding the future of CAT.}, keywords = {*Internet, *Models, Statistical, *User-Computer Interface, Certification, Health Surveys, Humans, Licensure, Microcomputers, Quality of Life}, isbn = {1529-7713 (Print)}, author = {Gershon, R. C.} } @article {30, title = {Calibration of an item pool for assessing the burden of headaches: an application of item response theory to the Headache Impact Test (HIT)}, journal = {Quality of Life Research}, volume = {12}, number = {8}, year = {2003}, note = {0962-9343Journal Article}, pages = {913-933}, abstract = {BACKGROUND: Measurement of headache impact is important in clinical trials, case detection, and the clinical monitoring of patients. Computerized adaptive testing (CAT) of headache impact has potential advantages over traditional fixed-length tests in terms of precision, relevance, real-time quality control and flexibility. OBJECTIVE: To develop an item pool that can be used for a computerized adaptive test of headache impact. METHODS: We analyzed responses to four well-known tests of headache impact from a population-based sample of recent headache sufferers (n = 1016). We used confirmatory factor analysis for categorical data and analyses based on item response theory (IRT). RESULTS: In factor analyses, we found very high correlations between the factors hypothesized by the original test constructers, both within and between the original questionnaires. These results suggest that a single score of headache impact is sufficient. We established a pool of 47 items which fitted the generalized partial credit IRT model. By simulating a computerized adaptive health test we showed that an adaptive test of only five items had a very high concordance with the score based on all items and that different worst-case item selection scenarios did not lead to bias. CONCLUSION: We have established a headache impact item pool that can be used in CAT of headache impact.}, keywords = {*Cost of Illness, *Decision Support Techniques, *Sickness Impact Profile, Adolescent, Adult, Aged, Comparative Study, Disability Evaluation, Factor Analysis, Statistical, Headache/*psychology, Health Surveys, Human, Longitudinal Studies, Middle Aged, Migraine/psychology, Models, Psychological, Psychometrics/*methods, Quality of Life/*psychology, Software, Support, Non-U.S. Gov{\textquoteright}t}, author = {Bjorner, J. B. and Kosinski, M. and Ware, J. E., Jr.} } @article {187, title = {Development of an index of physical functional health status in rehabilitation}, journal = {Archives of Physical Medicine and Rehabilitation}, volume = {83}, number = {5}, year = {2002}, note = {0003-9993 (Print)Journal Article}, month = {May}, pages = {655-65}, abstract = {OBJECTIVE: To describe (1) the development of an index of physical functional health status (FHS) and (2) its hierarchical structure, unidimensionality, reproducibility of item calibrations, and practical application. DESIGN: Rasch analysis of existing data sets. SETTING: A total of 715 acute, orthopedic outpatient centers and 62 long-term care facilities in 41 states participating with Focus On Therapeutic Outcomes, Inc. PATIENTS: A convenience sample of 92,343 patients (40\% male; mean age +/- standard deviation [SD], 48+/-17y; range, 14-99y) seeking rehabilitation between 1993 and 1999. INTERVENTIONS: Not applicable. MAIN OUTCOME MEASURES: Patients completed self-report health status surveys at admission and discharge. The Medical Outcomes Study 36-Item Short-Form Health Survey{\textquoteright}s physical functioning scale (PF-10) is the foundation of the physical FHS. The Oswestry Low Back Pain Disability Questionnaire, Neck Disability Index, Lysholm Knee Questionnaire, items pertinent to patients with upper-extremity impairments, and items pertinent to patients with more involved neuromusculoskeletal impairments were cocalibrated into the PF-10. RESULTS: The final FHS item bank contained 36 items (patient separation, 2.3; root mean square measurement error, 5.9; mean square +/- SD infit, 0.9+/-0.5; outfit, 0.9+/-0.9). Analyses supported empirical item hierarchy, unidimensionality, reproducibility of item calibrations, and content and construct validity of the FHS-36. CONCLUSIONS: Results support the reliability and validity of FHS-36 measures in the present sample. Analyses show the potential for a dynamic, computer-controlled, adaptive survey for FHS assessment applicable for group analysis and clinical decision making for individual patients.}, keywords = {*Health Status Indicators, *Rehabilitation Centers, Adolescent, Adult, Aged, Aged, 80 and over, Female, Health Surveys, Humans, Male, Middle Aged, Musculoskeletal Diseases/*physiopathology/*rehabilitation, Nervous System Diseases/*physiopathology/*rehabilitation, Physical Fitness/*physiology, Recovery of Function/physiology, Reproducibility of Results, Retrospective Studies}, author = {Hart, D. L. and Wright, B. D.} } @article {191, title = {Item response theory and health outcomes measurement in the 21st century}, journal = {Medical Care}, volume = {38}, number = {9 Suppl II}, year = {2000}, note = {204349670025-7079Journal Article}, pages = {II28-II42}, abstract = {Item response theory (IRT) has a number of potential advantages over classical test theory in assessing self-reported health outcomes. IRT models yield invariant item and latent trait estimates (within a linear transformation), standard errors conditional on trait level, and trait estimates anchored to item content. IRT also facilitates evaluation of differential item functioning, inclusion of items with different response formats in the same scale, and assessment of person fit and is ideally suited for implementing computer adaptive testing. Finally, IRT methods can be helpful in developing better health outcome measures and in assessing change over time. These issues are reviewed, along with a discussion of some of the methodological and practical challenges in applying IRT methods.}, keywords = {*Models, Statistical, Activities of Daily Living, Data Interpretation, Statistical, Health Services Research/*methods, Health Surveys, Human, Mathematical Computing, Outcome Assessment (Health Care)/*methods, Research Design, Support, Non-U.S. Gov{\textquoteright}t, Support, U.S. Gov{\textquoteright}t, P.H.S., United States}, author = {Hays, R. D. and Morales, L. S. and Reise, S. P.} }