-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathicpsr_study_schema.json
More file actions
934 lines (932 loc) · 55.9 KB
/
icpsr_study_schema.json
File metadata and controls
934 lines (932 loc) · 55.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
{
"$schema": "https://json-schema.org/draft-07/schema#",
"$id": "https://schemas.icpsr.umich.edu/schema/icpsr_study_schema?version=v1",
"title": "ICPSR Metadata Schema",
"description": "This is the metadata schema used to describe data collections at the Inter-university Consortium for Political and Social Research (ICPSR). These rules and definitions represent ICPSR's metadata practices and are intended to (a) assist ICPSR staff with metadata entry, and (b) help ICPSR users -- including data depositors and researchers accessing data -- understand how to use and interpret our metadata.",
"type": "object",
"required": [ "title", "principal_investigator", "version", "version_date", "distributor", "summary", "subject_term", "time_period", "geographic_coverage_area", "study_number" ],
"additionalProperties": false,
"properties": {
"version": {
"description": "The current version number for the data collection.",
"type": "integer",
"controlledVocab": "N/A",
"$ref": "https://schemas.icpsr.umich.edu/schema/yaml/version?version=v1",
"examples": [ 1, 2 ]
},
"version_date": {
"description": "The date on which the current version of the data collection was released by ICPSR.",
"type": "string",
"format": "date",
"controlledVocab": "N/A",
"usageNotes": "ICPSR automatically generates this date for data collection additions and updates. For metadata-only updates, the date remains unchanged.",
"examples": [ "2006-03-30", "2019-05-05" ]
},
"original_release_date": {
"description": "The date on which the data collection was originally released by ICPSR.",
"type": "string",
"format": "date",
"controlledVocab": "N/A",
"usageNotes": "ICPSR automatically generates the release date.",
"examples": [ "2001-02-07", "2020-08-12" ]
},
"title": {
"description": "The official title that describes what the data collection is about, its geographic scope, and the time period it covered.",
"type": "string",
"controlledVocab": "N/A",
"$ref": "https://schemas.icpsr.umich.edu/schema/yaml/title?version=v1",
"examples": [
"Bridge of Faith: Aim4Peace Community-Based Violence Prevention Project, Kansas City, Missouri, 2014-2017",
"Health and Relationships Project, United States, 2014-2015",
"Targeted Interventions to Prevent Chronic Low Back Pain in High Risk Patients: A Multi-Site Pragmatic Randomized Controlled Trial (TARGET Trial), 4 U.S. cities, 2016-2019",
"Aid Like A Paycheck (ALAP), Texas and California, 2014-2017",
"COVID-19 Disruptions Disproportionately Affect Female Academics, Global, 2020"
]
},
"alternate_title": {
"description": "The alternate name(s) or acronym(s) commonly used to refer to the data collection.",
"type": "array",
"items": {
"type": "string"
},
"controlledVocab": "N/A",
"usageNotes": "Alternate Title often takes the form of a shortened (by abbreviation or acronym) version of the official title.",
"examples": [
["Add Health Parent Study"],
["FACES 2009"],
["Surveys of Consumers"],
["Eurobarometer 85.2"]
]
},
"link_title": {
"description": "The title of an external resource that is included in the ICPSR catalog as a courtesy to users.",
"type": "string",
"controlledVocab": "N/A",
"usageNotes": "Always appears with the Link URL.",
"examples": [ "Cebu Longitudinal Health and Nutrition Survey" ]
},
"link_url": {
"description": "The URL of an external resource that is included in the ICPSR catalog as a courtesy to users.",
"type": "string",
"controlledVocab": "N/A",
"usageNotes": "Always appears with the Link Title.",
"examples": [ "https://cebu.cpc.unc.edu/" ]
},
"principal_investigator": {
"description": "The key people or organizations responsible for the data collection, listed by importance. Each data collection requires at least one PI, either a person or an organization.",
"type": "array",
"controlledVocab": "The [ICPSR Personal Names Authority List](https://www.icpsr.umich.edu/web/ICPSR/thesaurus/10002) and [Organization Names Authority List](https://www.icpsr.umich.edu/web/ICPSR/thesaurus/10004) are the primary authority control sources for PI names. The [Virtual International Authority File](https://viaf.org/) (VIAF) serves as a secondary resource if names are not present in ICPSR lists.",
"$ref": "https://schemas.icpsr.umich.edu/schema/yaml/pi_names?version=v1",
"items": {
"type": "object",
"properties": {
"person": {
"description": "The name of a person primarily responsible for the data collection.",
"type": "object",
"properties": {
"given_name": {
"description": "The person's given name.",
"type": "string"
},
"family_name": {
"description": "The person's family name (e.g., surname).",
"type": "string"
}
},
"required": ["given_name", "family_name"],
"additionalProperties": false,
"examples": [
{
"given_name": "James A.",
"family_name": "McCann"
},
{
"given_name": "Warren",
"family_name": "Winkelstein Jr."
},
{
"given_name": "E.V.",
"family_name": "Oppenhuis"
},
{
"given_name": "Miner P.",
"family_name": "Marchbanks III"
}
]
},
"organization": {
"description": "The name of the organization primarily responsible for the data collection OR the organization with which an individual PI was affiliated at the time of a data collection's deposit at ICPSR.",
"type": "string",
"examples": [
"University of Michigan",
"Harvard University. Medical School",
"University of California, Irvine",
"United States Department of Health and Human Services. Centers for Disease Control and Prevention. Office of Minority Health and Health Disparities"
]
},
"order": {
"description": "The order or rank of importance for the PIs associated with the data collection, typically provided to ICPSR by the lead PI.",
"type": "integer",
"controlledVocab": "N/A",
"usageNotes": "A value of '1' indicates the primary PI, '2' the second, and so forth.",
"examples": [1,2,3]
}
},
"oneOf": [
{ "required": ["person"] },
{ "required": ["organization"] }
],
"required": ["order"],
"additionalProperties": false
},
"minItems": 1,
"examples": [
[
{
"person": {
"given_name": "Jane",
"family_name": "Doe"
},
"organization": "Urban Institute",
"order": 1
},
{
"person": {
"given_name": "John Q.",
"family_name": "Public"
},
"organization": "Harvard University. Medical School",
"order": 2
}
],
[
{
"organization": "Urban Institute",
"order": 1
}
]
]
},
"citation": {
"description": "The official way to reference the data collection in writing.",
"type": "string",
"controlledVocab": "N/A",
"$ref": "https://schemas.icpsr.umich.edu/schema/yaml/citation?version=v1",
"examples": [
["University of Michigan. Survey Research Center. Economic Behavior Program. Survey of Consumer Attitudes and Behavior, September 2018. Inter-university Consortium for Political and Social Research [distributor], 2021-11-18. https://doi.org/10.3886/ICPSR38121.v1"],
["Goldin, Claudia, and Lawrence Katz. The 1915 Iowa State Census Project. ICPSR28501-v1. Ann Arbor, MI: Inter-university Consortium for Political and Social Research [distributor], 2010-12-14. http://doi.org/10.3886/ICPSR28501.v1"]
]
},
"distributor": {
"description": "The organization(s) responsible for distributing the data collection.",
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {
"description": "The name of the data distributor.",
"type": "string",
"controlledVocab": "[ICPSR Organization Names Authority List](https://www.icpsr.umich.edu/web/ICPSR/thesaurus/10004)",
"examples": [
"Inter-university Consortium for Political and Social Research",
"Roper Center for Public Opinion Research"
]
},
"location": {
"description": "The location of the data distributor.",
"type": "string",
"controlledVocab": "N/A",
"usageNotes": "Include the city and state (or country) where the distributor is located.",
"examples": ["Ann Arbor, MI", "Chicago, IL"]
},
"order": {
"description": "The order of importance for the distributors of the data collection.",
"type": "integer",
"controlledVocab": "N/A",
"usageNotes": "A value of '1' indicates the primary distributor, '2' the second, and so forth.",
"examples": [1,2,3]
}
},
"required": ["name", "location", "order"],
"additionalProperties": false
},
"minItems": 1,
"$ref": "https://schemas.icpsr.umich.edu/schema/yaml/distributor?version=v1",
"examples": [
[
{
"name": "Inter-university Consortium for Political and Social Research",
"location": "Ann Arbor, MI",
"order": 1
}
],
[
{
"name": "Inter-university Consortium for Political and Social Research",
"location": "Ann Arbor, MI",
"order": 1
},
{
"name": "Roper Center for Public Opinion Research",
"location": "Princeton, NJ",
"order": 2
}
]
]
},
"study_number": {
"description": "A unique, numerical value used by ICPSR to identify and track data collections.",
"type": "integer",
"controlledVocab": "N/A",
"usageNotes": "The study number is automatically generated by ICPSR and is unique. Current study numbers are five digits, though four digit numbers were once standard and are still acceptable.",
"examples": [2760, 3025, 38672]
},
"doi": {
"description": "The registered persistent digital object identifier (DOI) associated with the data collection.",
"type": "string",
"format": "uri",
"controlledVocab": "N/A",
"usageNotes": "The DOI (digital object identifier) is a persistent identifier provided by DataCite, a DOI registration agency. The DOI name is divided into three parts, separated by slashes ('/'): 'https://doi.org' is the HTTP URL link; followed by '10.3886', a globally unique number that identifies ICPSR as the registrant within the DOI namespace; followed by 'ICPSR', the ICPSR study number, and then the version number. The study number is automatically generated by ICPSR and is unique. Current study numbers are five digits, though four digit numbers were once standard and are still acceptable. Studies with fewer than five digits will have zeroes prepended in the DOI (e.g., '10.3886/ICPSR02760).",
"examples": [ "https://doi.org/10.3886/ICPSR03025.v2", "https://doi.org/10.3886/ICPSR06425.v1" ]
},
"funding_source": {
"description": "The sources of funding that supported the data collection.",
"type": "array",
"items": {
"type": "object",
"properties": {
"agency": {
"description": "An organization that supported the data collection.",
"type": "string",
"controlledVocab": "The [ICPSR Organization Names Authority List](https://www.icpsr.umich.edu/web/ICPSR/thesaurus/10004) is the primary authority control source for funding agencies. The [Virtual International Authority File](https://viaf.org/) (VIAF) serves as a secondary resource if names are not present in the ICPSR list.",
"$ref": "https://schemas.icpsr.umich.edu/schema/yaml/funding_agency?version=v1",
"icpsrGuidance": "The Principal Investigator's home institution does not need to be listed as a funding agency unless the PI provides a grant number (or other award information) or makes a specific request.",
"examples": [
"United States Department of Justice. Office of Justice Programs. Bureau of Justice Statistics",
"Institute of Museum and Library Services",
"Robert Wood Johnson Foundation"
]
},
"grant_number": {
"description": "A unique identifier associated with the funding.",
"type": "array",
"items": {
"type": "string"
},
"controlledVocab": "N/A",
"usageNotes": "Internal blanks in the Grant Number are replaced with hyphens. Multiple grants from the same funding agency are separated by a comma.",
"examples": [
["SES-1835721"],
["MDR-8550085", "MDR-8550204"],
["40791"]
]
},
"purpose": {
"description": "The purpose of the funding.",
"type": "array",
"items": {
"type": "string",
"enum": ["collection and/or analysis of data", "secondary analysis of data", "archiving of data"]
},
"$ref": "https://schemas.icpsr.umich.edu/schema/yaml/funding_purpose?version=v1",
"icpsrGuidance": "This is an internal ICPSR element that is not publicly displayed. Certain ICPSR topical archives find this useful as they assemble reports for their funding agencies.",
"examples": [
["collection and/or analysis of data", "secondary analysis of data"],
["archiving of data"]
]
},
"order": {
"description": "The relative order of funding sources associated with the data collection.",
"type": "integer",
"controlledVocab": "N/A",
"usageNotes": "A value of '1' indicates the primary funder, '2' the second, and so forth.",
"examples": [1,2,3]
}
},
"required": ["agency", "order"],
"dependentRequired": { "grant_numbers": ["agency"] },
"additionalProperties": false
},
"examples": [
[
{
"agency": "Robert Wood Johnson Foundation",
"grant_numbers": ["MDR-8550085", "MDR-8550204"],
"purpose": ["collection and/or analysis of data"],
"order": 1
},
{
"agency": "United States Department of Justice. Office of Justice Programs. Bureau of Justice Statistics",
"grant_numbers": ["SES-1835721"],
"order": 2
}
],
[
{
"agency": "Institute of Museum and Library Services",
"order": 1
}
]
]
},
"external_source_ID": {
"description": "A unique identifier supplied by the data depositor.",
"type": "array",
"items": {
"type": "string"
},
"controlledVocab": "N/A",
"icpsrGuidance": "This is an internal ICPSR element that is not publicly displayed. An External Source ID consists of: an ICPSR-defined source organization code, a colon, and a Depositor-supplied ID.",
"examples": [
["BJS:271"],
["PSC:12345"]
]
},
"summary": {
"description": "A description of the data collection that helps users understand its purpose, substance, and key topics.",
"type": "string",
"controlledVocab": "N/A",
"$ref": "https://schemas.icpsr.umich.edu/schema/yaml/summary?version=v1",
"examples": [
"In 2014, Chicago Public Schools, looking to reduce the possibility of gun violence among school-aged youth, applied for a grant through the National Institute of Justice. CPS was awarded the Comprehensive School Safety Initiative grant and use said grant to establish the 'Connect and Redirect to Respect' program. This program used student social media data to identify and intervene with students thought to be at higher risk for committing violence. At-risk behaviors included brandishing a weapon, instigating conflict online, signaling gang involvement, and threats towards others. Identified at-risk students would be contacted by a member of the CPS Network Safety Team or the Chicago Police Department's Gang School Safety Team, depending on the risk level of the behavior. To evaluate the efficacy of CRR, the University of Chicago Crime Lab compared outcomes for students enrolled in schools that received the program to outcomes for students enrolled in comparison schools, which did not receive the program. 32 schools were selected for the study, with a total of 44,503 students. Demographic variables included age, race, sex, and ethnicity. Misconduct and academic variables included arrest history, in-school suspensions, out-of-school suspensions, GPA, and attendance days.",
"The Health and Relationship Project is a study of both spouses in same-sex and different-sex marriages who were legally married and aged 35 to 65 at the time of data collection (2015). There are two parts of this study: a baseline questionnaire and a daily diary questionnaire completed for 10 consecutive days; both components were completed online and spouses were asked to complete the surveys separately. The baseline questionnaire asks participants about a number of topics related to marriage and health, including stress, health status and health behaviors, relationship quality, and how they have approached health problems in the past. The diary questionnaire asks participants a number of questions about the past 24 hours, including daily stress experiences, social interactions, and health behaviors."
]
},
"subject_term": {
"description": "A controlled list of social science terms maintained by ICPSR and used to indicate topics related to the data collection.",
"type": "array",
"items": {
"type": "string"
},
"minItems": 1,
"controlledVocab": "The [ICPSR Subject Thesaurus](https://www.icpsr.umich.edu/web/ICPSR/thesaurus/10001) and the [ICPSR Personal Names Authority List](https://www.icpsr.umich.edu/web/ICPSR/thesaurus/10002) are preferred sources.",
"usageNotes": "The [Library of Congress Subject Terms](https://authorities.loc.gov) and the [European Language Social Science Thesaurus](https://elsst.cessda.eu/) are referenced when adding new terms to the ICPSR Subject Thesaurus.",
"icpsrGuidance": "Non-thesaurus terms can be submitted in the metadata editor and will be reviewed by the metadata librarian. If an ICPSR staff member submits a non-thesaurus term, the metadata librarian will gauge the necessity of this term, check it against the Library of Congress Subject Headings and European Language Social Science Thesaurus to see if a different related term should be used, and consider it for addition to the ICPSR thesaurus.",
"examples": [
["child care"],
["Social Security", "crime", "victimization"],
["COVID-19", "Biden, Joe"]
]
},
"geographic_coverage_area": {
"description": "The geographic locations where the data refer or are related.",
"type": "array",
"items": {
"type": "string"
},
"minItems": 1,
"controlledVocab": "[ICPSR Geographic Names Thesaurus](https://www.icpsr.umich.edu/web/ICPSR/thesaurus/10003).",
"$ref": "https://schemas.icpsr.umich.edu/schema/yaml/geographic_coverage_area?version=v1",
"examples": [
["United States", "Maryland", "Baltimore"],
["United Kingdom", "China"],
["Canada", "Alberta"]
]
},
"time_period": {
"description": "The time period(s) to which the data refer, regardless of when the data were collected.",
"type": "array",
"items": {
"type": "object",
"properties": {
"date": {
"description": "The date (or date range) for a time period to which the data refer.",
"type": "string",
"controlledVocab": "N/A",
"$ref": "https://schemas.icpsr.umich.edu/schema/yaml/time_period_date?version=v1",
"examples": [
"2020",
"2021--2022",
"2006-03--2006-04",
"2020-01-21--2021-01-21"
]
},
"time_frame": {
"description": "An optional free-text description of the time period, used for non-numeric dates (e.g., 'Fall 2012') or to add context when multiple dates are present.",
"type": "string",
"controlledVocab": "N/A",
"$ref": "https://schemas.icpsr.umich.edu/schema/yaml/time_period_time_frame?version=v1",
"examples": [
"Wave 1",
"Spring 2013",
"Post-Survey Follow-Up"
]
}
},
"required": ["date"],
"additionalProperties": false
},
"minItems": 1,
"examples": [
[
{
"date": "2020-01-21--2020-06-21",
"time_frame": "Wave 1"
},
{
"date": "2022-01--2023-01",
"time_frame": "Wave 2"
}
],
[
{ "date": "2020"}
]
]
},
"collection_date": {
"description": "The date(s) when the data were physically collected.",
"type": "array",
"items": {
"type": "object",
"properties": {
"date": {
"description": "The date (or date range) of the data collection period.",
"type": "string",
"controlledVocab": "N/A",
"usageNotes": "Dates are formatted in accordance with ISO 8601 (YYYY, YYYY-MM, or YYYY-MM-DD). One hyphen separates the parts of a date; two hyphens separate two dates. Ranges may be expressed in years (YYYY--YYYY), months (YYYY-MM--YYYY-MM), or days (YYYY-MM-DD--YYYY-MM-DD). No spaces are permitted in date expressions.",
"examples": [
"2020",
"2021--2022",
"2006-03--2006-04",
"2020-01-21--2021-01-21"
]
},
"time_frame": {
"description": "An optional free-text description of the data collection period, used for non-numeric dates (e.g., 'Fall 2012') or to add context when multiple dates are present.",
"type": "string",
"controlledVocab": "N/A",
"$ref": "https://schemas.icpsr.umich.edu/schema/yaml/collection_date_time_frame?version=v1",
"examples": [
"Wave 1",
"Spring 2013",
"Post-Survey Follow-Up"
]
}
},
"required": ["date"],
"additionalProperties": false
},
"minItems": 1,
"examples": [
[
{
"date": "2020-01-21--2020-06-21",
"time_frame": "Wave 1"
},
{
"date": "2022-01--2023-01",
"time_frame": "Wave 2"
}
],
[
{ "date": "2020"}
]
]
},
"universe": {
"description": "The total group of persons or other entities (e.g., households or organizations) that were the object of research and to which analytic results refer.",
"type": "string",
"controlledVocab": "N/A",
"usageNotes": "Age, nationality, and residence commonly help to delineate a given universe, but any of a number of factors may be involved, such as sex, race, income, veteran status, criminal convictions, etc. The Universe may consist of elements other than persons, such as housing units, court cases, deaths, countries, etc. It should be possible to tell from the description of the universe whether a given individual or element (hypothetical or real) is a member of the population under study. Typically, the Universe statement is about one sentence or shorter, and reflects the entire possible population a data collection sought to study.",
"icpsrGuidance": "Universe is distinct from Sampling as it describes the population the study seeks to analyze, while Sampling describes how the researchers selected participants among that population. Universe should not contain information about methodology, only general information about the target population.",
"examples": [
"All households in the United States with phones.",
"Part 1: Thirty cities in Massachusetts during 1980-1986. Parts 2-4: All residents in Massachusetts during 1986.",
"Individuals self-identified as transgender, trans, genderqueer, non-binary, or other identities on the transgender identity spectrum aged 18 and older residing in the fifty U.S. states, the District of Columbia, American Samoa, Guam, Puerto Rico, and U.S. military bases overseas.",
"Jihadists from the United States and Canada, along with Incels from Germany, Canada, the United States, and United Kingdom.",
"All publicly funded medical examiner and coroner offices.",
"Uncertified ballots for the 2000 United States presidential election in Florida."
]
},
"data_type": {
"description": "The types of data included in the data collection.",
"type": "array",
"items": {
"type": "string",
"enum": [
"administrative records data",
"aggregate data",
"audio: sound data",
"census/enumeration data",
"clinical data",
"event/transaction data",
"experimental data",
"geographic information system (GIS) data",
"images: photographs, drawings, graphical representations",
"medical records",
"observational data",
"program source code",
"roll call voting data",
"survey data",
"text",
"video: film, animation, etc."
]
},
"$ref": "https://schemas.icpsr.umich.edu/schema/yaml/data_type?version=v1",
"examples": [
["administrative records data"],
["census/enumeration data", "survey data", "video: film, animation, etc."]
]
},
"collection_note": {
"description": "Important details about the data collection (like unique authoring, discrepencies, or processing information) that can't be recorded in other metadata elements.",
"type": "array",
"items": {
"type": "string"
},
"controlledVocab": "N/A",
"usageNotes": "Collection Notes should include any information that does not fit anywhere else in the metadata, such as: information about unique aspects of the way the data was processed, discrepancies between the metadata and documentation files, information about the research team, or series-specific notes.",
"examples": [
[
"Exchange rates are expressed in United States dollars per national currency unit or vice versa, and two rates are given for the special drawing right (SDR) value of the national currency unit."
],
[
"Percentage distributions provided in the codebook were generated using full weights, which are not available on the public use files. Therefore, these results cannot be replicated using the public use files. The differences between results produced using the full weights and those produced using the sampling weights available on the public use files are estimated to be below 1 percent."
],
[
"Information on the Index of Consumer Sentiment, the Index of Current Economic Conditions, and the Index of Consumer Expectations and how they were created can be found in the P.I. Codebook.",
"Additional information on the Survey of Consumers can be found by visiting the Survey of Consumers Website."
],
[
"At PI request, dataset 1 should be attributed to Anura P. Jayasumana while datasets 2-6 should be attributed to Jytte Klausen. Please refer to the PI user guide for additional information."
]
]
},
"study_purpose": {
"description": "The study's main goals and associated research questions.",
"type": "string",
"controlledVocab": "N/A",
"usageNotes": "The Study Purpose provides more specific information than the Summary element, including the data collection's objectives, intended achievements, and lists of questions it sought to answer. This element can also include historical or background information about the research project. As with the Summary, the text should be written in third person and avoid any commentary on the data collection's outcomes.",
"examples": [
"The purpose of this study is to advance understanding of the barriers and enablers associated with colorectal cancer (CRC) screening among Somali men ages 50-74 in Minnesota.",
"The purpose of the study's qualitative phase is to explore veterans' experiences by identifying factors that they believe caused or contributed to their contact with the criminal justice system.",
"The purpose of the study was to explore the impact of interventions by Sexual Assault Nurse Examiners/Sexual Assault Response Teams (SANE/SART) on the judicial process. The goal of this study was to test the efficacy of SANE/SART programs as a tool in the criminal justice system. The American Prosecutors Research Institute and Boston College tested the hypotheses that SANE/SART exams increase arrest and prosecution rates. In testing this hypothesis, the project team sought to answer five primary research questions: (1) Is the arrest rate higher in cases where a SANE/SART exam is performed as compared with cases in which no exam is performed?, (2) Is the indictment/charging rate higher in such cases?, (3) Are guilty pleas more likely to be entered in such cases, and are pleas likely to be to the existing charge or to a lesser charge?, (4) Is the conviction rate higher in such cases?, and (5) Is the sentence more severe in such cases? In addition, the project team examined the participation of victims in the criminal justice process and the types of services that were offered them. As a large portion of SANE/SART programs focus on understanding victims' reactions to sexual assault and ensuring proper treatment to minimize the chance of further trauma, a central hypothesis to be tested was that improved case outcomes may be a result of increased participation by the victim in the identification, apprehension, and prosecution of the perpetrator. Moreover, the level of services offered and provided to victims, particularly those related to prosecution would likely affect case outcomes as well. Both the victim's participation in the criminal justice system and specifics of SANE/SART services, including evidence collection, were considered in determining the true impact of SANE/SART interventions on case outcomes."
]
},
"study_design": {
"description": "The procedures used to contact participants and gather data.",
"type": "string",
"controlledVocab": "N/A",
"$ref": "https://schemas.icpsr.umich.edu/schema/yaml/study_design?version=v1",
"examples": [
"Data on organizational culture in each of the 12 courts (Part 1) were obtained by administering the Court Culture Assessment Instrument (CCAI) to all judges with a felony criminal court docket and to all senior court administrators. A total of 224 respondents completed the questionnaire. The CCAI was used to assess five key dimensions of current court culture orientation: (1) dominant case management style, (2) judicial and court staff relations, (3) change management, (4) courthouse leadership, and (5) internal organization. The determination of what culture judges and court administrators desired to establish in the near future was also obtained through the application of the same instrument (CACI) as practitioners were asked to indicate the type of culture in each work area (or content dimension) they would like to see in their court in the next five years. Additionally, surveys were conducted of prosecuting attorneys (Part 2) and public defender attorneys (Part 3) to gauge their views on how well the courts in which they practice achieve the goals of access, fairness, and managerial effectiveness. Every prosecutor and public defender with two years or more experience in representing the state or criminal defendants in felony cases was asked to complete a questionnaire probing their thoughts on how well their court acted to promote access to records through availability and staff cooperation, treating litigants, witnesses, jurors and others fairly, and demonstrating concern for the rights and interests of others in the criminal trial process, including attorney and victims. A total of 334 prosecuting attorneys and 260 public defense attorneys completed the 46-item trial court process survey."
]
},
"variable_description": {
"description": "Significant variables (particularly demographic variables) in the data files.",
"type": "string",
"controlledVocab": "N/A",
"usageNotes": "The Variable Description provides more detailed information than the Summary, including a review of variables that are important for users to know about. The codebook, setup files, and variable groups are appropriate sources of information for Variable Description.",
"examples": [
"The data includes variables about participants' and their parents' moods, interviewer observations, families' activities, families' health history, participants' school records, and parents' substance use. Demographic variables include race, religion, annual household income, and the participants' parents' employment statuses.",
"The LGBTQ Hate Crimes Interviews dataset contains more in-depth information, including victim demographic information, substance abuse history, information on whether the victim is open about their LGBTQ identification, the victim's job status, and information about how the victim reacted to the crime, such as whether or not they reported the crime to the police and their level of cooperation with the police and prosecution."
]
},
"sampling": {
"description": "The methods used to select the subset of the population that data are to be collected from (e.g., simple, systematic, stratified).",
"type": "string",
"controlledVocab": "N/A",
"usageNotes": "The sample is a selection out of the universe of all possible relevant cases (e.g., adults in the United States, housing units in three counties of Michigan, etc.) that could have been included in the data collection. A detailed discussion of such things as sampling error or other limitations of the sampling methodology is not required here. Note that some studies, such as censuses, do not utilize samples but include all members of the universe. In such cases, 'inap.' may be entered in Sampling to indicate it was not omitted in error.",
"icpsrGuidance": "Sampling is distinct from Universe as it describes the methods the researcher used to select or recruit participants among the target population, while Universe describes the whole population the researchers sought to analyze. If no information about Sampling methodology is available, it is not appropriate to simply restate the Universe.",
"examples": [
[
"National sample of telephone numbers from cell (RDD) sampling frame."
],
[
"The probability sample selected to represent the universe consists of approximately 71,000 households."
],
[
"The data collection is a pooled cross-sectional time-series of bank robberies in 50 states over a period of 6 years (1970-1975), resulting in 300 observations."
],
[
"Three target groups were identified: lawyers 36 years of age and above who were members of the American Bar Association (ABA), all the remaining members of the ABA excluding law students, and all lawyers in the nonmember files kept by the ABA. A systematic random probability sample was drawn to represent each of the three groups. The group of young lawyers was oversampled."
],
[
"The original National Longitudinal Survey of Youth Children and Young Adults 1979 (NLSY79) was a multi-stage, stratified random national sample. Sampling weights are available in the public-use datasets to adjust for minority oversamples and year-to-year attrition. There are mother and child specific weights. Primary Sampling Units (PSUs) were counties and independent cities. PSUs were stratified prior to sampling based on 9 Census divisions and 2 urban/rural classes.",
"The initial Panel Study of Income Dynamics (PSID) combined two independent samples: a cross-sectional, national sample (based on stratified multistage selection of the civilian noninstitutional population of the U.S.) and a national sample of low-income families. The cross-section sample was an equal probability sample of households in the 48 coterminous states designed to yield about 3,000 completed interviews. The second sample was selected from the Census Bureau's Survey of Economic Opportunity (SEO) using unequal selection probabilities. "
]
]
},
"time_method": {
"description": "The methods used to collect data over time, like snapshots at one point (cross-sectional) or repeatedly (longitudinal) to study changes or trends.",
"type": "array",
"items": {
"type": "string",
"enum": [
"Cross-sectional",
"Cross-sectional ad-hoc follow-up",
"Longitudinal",
"Longitudinal: Cohort / Event-based",
"Longitudinal: Panel",
"Longitudinal: Panel: Continuous",
"Longitudinal: Panel: Interval",
"Longitudinal: Trend / Repeated Cross-section",
"Time Series",
"Time Series: Continuous",
"Time Series: Discrete"
]
},
"$ref": "https://schemas.icpsr.umich.edu/schema/yaml/time_method?version=v1",
"examples": [
["Cross-sectional"],
["Longitudinal: Cohort / Event-based", "Time Series"]
]
},
"data_source": {
"description": "The source of the data, when that source is external to the data collection and can be independently cited.",
"type": "array",
"items": {
"type": "string"
},
"controlledVocab": "N/A",
"usageNotes": "Data Source includes such source entities as books, journal articles, administrative records, agency-sponsored surveys, or machine-readable files. Each source includes at minimum the title, author, publication year, and journal (if applicable). Any citation format is accepted.",
"examples": [
[
"'Voting Scores.' Congressional Quarterly Almanac 33 (1977), 487-498"
],
[
"United States Bureau of the Census Economic Surveys, 1998-2000",
"United States Congressional Record, 1989"
],
[
"Annual Company Organization Survey, 2003"
]
]
},
"collection_mode": {
"description": "The method(s) or procedure(s) used to collect the data.",
"type": "array",
"items": {
"type": "string",
"enum": [
"audio computer-assisted self interview (ACASI)",
"audiovisual touch-screen computer-assisted self interview (AVT-CASI)",
"coded on-site observation",
"coded video observation",
"cognitive assessment test",
"computer-assisted personal interview (CAPI)",
"computer-assisted self interview (CASI)",
"computer-assisted telephone interview (CATI)",
"face-to-face interview",
"mail questionnaire",
"mixed mode",
"on-site questionnaire",
"paper and pencil interview (PAPI)",
"record abstracts",
"remote sensing",
"self-enumerated questionnaire",
"telephone audio computer-assisted self interview (TACASI)",
"telephone interview",
"web scraping",
"web-based survey"
]
},
"$ref": "https://schemas.icpsr.umich.edu/schema/yaml/collection_mode?version=v1",
"examples": [
["audio computer-assisted self interview (ACASI)"],
["computer-assisted self interview (CASI)", "face-to-face interview"]
]
},
"extent_of_processing": {
"description": "Processing activities and checks performed on the data collection by ICPSR curation staff.",
"icpsrGuidance": "This element is displayed to end-users in version history.",
"type": "array",
"items": {
"type": "string",
"enum": [
"Checked for undocumented or out-of-date codes",
"Created online analysis version with question text",
"Created variable labels and/or value labels",
"Performed consistency checks",
"Performed recodes and/or calculated derived variables",
"Standardized missing values"
]
},
"$ref": "https://schemas.icpsr.umich.edu/schema/yaml/extent_of_processing?version=v1",
"examples": [
[
"Created variable labels and/or value labels.",
"Standardized missing values.",
"Checked for undocumented or out-of-range codes."
],
[
"Created online analysis version with question text."
]
]
},
"weight": {
"description": "The weight variables and the criteria for using them in data analysis or other information about how the data are weighted if no weight variables are present.",
"type": "string",
"controlledVocab": "N/A",
"usageNotes": "Weight includes any information about weighting variables in the data, as well as any other weight information provided by the Principal Investigator. If a weighting formula or coefficient was developed, provide this formula, define its elements, and indicate how the formula is applied to the data. It is acceptable to summarize information contained in documentation and refer users to that documentation for more information.",
"examples": [
"A weight variable with two implied decimal places has been included and must be used in any analysis.",
"Both the TransPop and Cisgender datasets have the same variable named WEIGHT as the weighting variable. The combination datasets have a set of three weight variables (WEIGHT_TRANSPOP, WEIGHT_CISGENDER, WEIGHT_CISGENDER_TRANSPOP). The results will be representative of the sample when the weight is applied. Pages 41 and 42 of the user guide contain instructions that detail how to apply the final sample weight using Stata or SPSS.",
"The 1996 NES dataset includes two final person-level analysis weights which incorporate sampling, nonresponse, and post-stratification factors. One weight (variable #4) is for longitudinal micro-level analysis using the 1996 NES Panel. The other weight (variable #3) is for analysis of the 1996 NES combined sample (Panel component cases plus Cross-section supplement cases). In addition, a Time Series Weight (variable #5) which corrects for Panel attrition was constructed. This weight should be used in analyses which compare the 1996 NES to earlier unweighted National Election Study data collections."
]
},
"response_rates": {
"description": "The percentage of respondents in the sample who participated in the data collection.",
"type": "string",
"controlledVocab": "N/A",
"usageNotes": "Only applicable if the data were collected with a survey instrument and the response rates are provided.",
"examples": [
"The overall response rate for this survey was 20.22%; 72.6% for existing panelists and 10.4% for new panelists, using AAPOR Response Rate 1.",
"The response rate for the pre-election interview was 55.8 percent (66.5 percent for the Panel and 35.2 percent for the Fresh Cross). The response rate for the post-election interview was 89.1 (90.1 percent for the Panel and 85.2 percent for the Fresh Cross).",
"Not applicable."
]
},
"scale": {
"description": "Any commonly known scales used to collect data for the data collection (e.g., MMPI, CPI, the Census Occupational Codes, etc.).",
"type": "string",
"controlledVocab": "N/A",
"$ref": "https://schemas.icpsr.umich.edu/schema/yaml/scale?version=v1",
"examples": [
["The baseline data collection included one scale - the CES-D index for maternal depression [Cole, J. C., Rabin, A. S., Smith, T. L., and Kaufman, A. S. (2004). Development and validation of a Rasch-derived CES-D short form. Psychological assessment, 16(4), 360]. All scales used for outcomes at ages 1 through 3 are listed in Appendix Tables 1 and 2 in the User Guide. Please refer to the User Guide and P.I. Codebook, available under the 'Data and Documentation' tab, for details."],
["Squires, J., Bricker, D. D., and Twombly, E. (2009). Ages and stages questionnaires. Baltimore, MD: Paul H. Brookes.", "Briggs-Gowan, M. J., Carter, A. S., Irwin, J. R., Wachtel, K., and Cicchetti, D. V. (2004). The Brief Infant-Toddler Social and Emotional Assessment: screening for social-emotional problems and delays in competence. Journal of pediatric psychology, 29(2), 143-155.", "Yu, L., Buysse, D. J., Germain, A., Moul, D. E., Stover, A., Dodds, N. E., ... and Pilkonis, P. A. (2012). Development of short forms from the PROMIS sleep disturbance and sleep-related impairment item banks. Behavioral sleep medicine, 10(1), 6-24."]
]
},
"unit_of_observation": {
"description": "The object(s) of analysis for the data collection, such as an organization, individual, or household.",
"type": "array",
"controlledVocab": "N/A",
"items": {
"type": "string"
},
"usageNotes": "Use a brief phrase, for example: 'Individual,' 'Family,' 'Household,' or 'Organization'; when possible, conform to the [DDI Controlled Vocabulary for Analysis Unit](https://vocabularies.cessda.eu/vocabulary/AnalysisUnit).",
"examples": [
["Organization"],
["Individual, Household"],
["Family"]
]
},
"smallest_geographic_unit": {
"description": "The smallest geographic unit (e.g., state or census tract) used in the dataset.",
"type": "string",
"controlledVocab": "N/A",
"$ref": "https://schemas.icpsr.umich.edu/schema/yaml/smallest_geographic_unit?version=v1",
"examples": [
"state",
"Census tract",
"precinct"
]
},
"restrictions": {
"description": "Rules about how the data collection can be accessed or used.",
"type": "string",
"controlledVocab": "N/A",
"usageNotes": "Restrictions informs users that access to certain variables in a collection may be limited, and that they should contact ICPSR directly to inquire about accessing them.",
"examples": [
"These data may not be used for any purpose other than statistical reporting and analysis. Use of these data to learn the identity of any person or establishment is strictly prohibited. To protect respondent privacy, certain files within this data collection are restricted from general dissemination. To obtain these files, researchers must agree to the terms and conditions of a Restricted Data Use Agreement in accordance with existing ICPSR servicing policies."
]
},
"membership_required": {
"description": "The availability of the data collection in terms of ICPSR membership. Members-only data may only be downloaded by affiliates of ICPSR member institutions who contribute funding to support the data.",
"type": "boolean",
"controlledVocab": "N/A",
"usageNotes": "True indicates the data are only available to members; false indicates that the data are available to all users. Additional access restrictions (i.e., due to sensitive data or disclosure risks) may still apply.",
"examples": ["True", "False"]
},
"restricted_access": {
"description": "General indication of any access restrictions associated with the data collection. More detailed information is provided in the Restrictions element.",
"type": "boolean",
"controlledVocab": "N/A",
"usageNotes": "True indicates that an access restriction is associated with the data collection; false indicates no access restrictions are present. Additional membership requirements may still apply.",
"examples": ["True", "False"]
},
"changes_to_collection": {
"description": "A record of how the data collection has changed over time.",
"type": "array",
"controlledVocab": "N/A",
"items": {
"type": "object",
"properties": {
"date": {
"description": "The date on which an update occurred. ICPSR automatically generates this date.",
"type": "string",
"format": "date",
"examples": [ "2006-03-30", "2019-05-05" ]
},
"note": {
"description": "An explanation of the nature of the update.",
"type": "string",
"examples": [
"File CB3025.ALL.PDF was removed from any previous datasets and flagged as a study-level file, so that it will accompany all downloads.",
"The data producer provided additional data files.",
"SAS and SPSS setup files were created.",
"The codebook descriptions of variables TANSUP, EMOSUP, and SOCSUP were corrected."
]
}
},
"additionalProperties": false
},
"usageNotes": "Textual changes are recorded only when data or documentation files are updated or added to the data collection (and the Version number is incremented).",
"examples": [
[
{
"date": "2003-09-10",
"note": "A variable specifying the date of interview has been added to the collection."
},
{
"date": "2003-12-09",
"note": "The codebook descriptions of variables TANSUP, EMOSUP, and SOCSUP were corrected."
}
]
]
},
"series": {
"description": "A named collection of related studies.",
"type": "string",
"controlledVocab": "[ICPSR Series](https://www.icpsr.umich.edu/web/ICPSR/search/series)",
"usageNotes": "Typically the studies in an ICPSR series are produced by the same group of investigators, and either explore different facets of the same topic, or repeat the same investigation over time. Each series name is given in title case (all major words are capitalized, while minor words are lowercased) and ends with the word 'Series'.",
"examples": [
["American National Election Study (ANES) Series"],
["Census of Population and Housing, 1990 [United States] Series"],
["National Black Election Study Series"],
["Study of Women's Health Across the Nation (SWAN) Series"]
]
},
"classification": {
"description": "Topics used to organize data collections and help users explore the ICPSR catalog.",
"type": "array",
"items": {
"type": "string"
},
"controlledVocab": "[ICPSR Topic Classifications](https://www.icpsr.umich.edu/web/pages/ICPSR/access/subject.html)",
"icpsrGuidance": "When there are multiple subclassifications, the smallest (most detailed) one is chosen to represent the data collection. Each data collection must have at least one ICPSR classification, as well as a classification from the collection's home archive. Classifications can also be used to cross-list a study among multiple archives. Curators should choose the topical classifications that best match the study's focus.",
"examples": [
["I.A.2. Census Enumerations: Historical and Contemporary Population Characteristics, United States, American Housing Survey Series"],
["XVII.C.1. Social Institutions and Behavior, Socialization, Students, and Youth, United States"]
]
},
"filesets": {
"description": "The grouping of files in the data collection.",
"type": "array",
"controlledVocab": "N/A",
"items": {
"type": "object",
"properties": {
"number": {
"description": "A number that uniquely identifies a 'part' or component file that is associated with the data collection.",
"type": "integer",
"usageNotes": "Fileset numbers are typically (but not always) consecutive integers beginning with 1. (In some cases, the number may be drawn from an external resource, such as FIPS state and county codes.) The numbers correspond to the 'part numbers' embedded in ICPSR standard filenames.",
"examples": [1, 2, 3]
},
"name": {
"description": "A brief title used to distinguish each fileset within a data collection.",
"type": "string",
"controlledVocab": "N/A",
"usageNotes": "Fileset Names are required for data collections that include multiple Filesets. If a data collection includes only one fileset, a Fileset Name is not included. Fileset Names use title case (all main words are capitalized) and do not begin with articles (a, the) or dates.",
"examples": [
"Each Region, Wealth Summary: Middle Colonies (MIDLCOL)",
"Each Region, Wealth Summary: New England (NEWENGL)",
"Northbound Public-Use Data",
"Northbound Restricted-Use Data"
]
},
"sda_note": {
"description": "Additional information about the fileset for the purpose of helping online analysis users.",
"type": "string",
"controlledVocab": "N/A",
"examples": [
"Please note that the AABS provides estimates for 32 states. It also supplies arts participation estimates for 11 metropolitan areas. Users are encouraged to review the Data Collection Notes on the Study Description page for specific states and metropolitan areas.",
"Please note that the replicate weights are needed to obtain accurate standard error estimates. Users are advised to download the data to use the replicate weights. Users should refer to the study description page or User Guide for further details regarding weights."
]
}
},
"required": ["number"],
"dependentRequired": { "name": ["number"] },
"additionalProperties": false
},
"usageNotes": "Filesets are used at ICPSR to make a convenient package for description, discovery, preservation and dissemination -- a package that is smaller than the data collection but larger than the individual file. A fileset typically contains a single file of statistical data plus additional files that support the data -- such as setups for statistical software, documentation, and alternative data representations. Every ICPSR data collection with at least one file must have at least one defined Fileset; a data collection may have multiple filesets. Each Fileset has a Number, and may also have a Name and an SDA (Survey Documentation and Analysis) Note.",
"examples": [
[
{
"number": 1
}
],
[
{
"number": 1,
"name": "Northbound Public-Use Data"
},
{
"number": 2,
"name": "Northbound Restricted-Use Data"
}
],
[
{
"number": 1,
"name": "Original File"
},
{
"number": 2,
"name": "Replicate Weight File",
"sda_note": "Please note that the replicate weights are needed to obtain accurate standard error estimates. Users are advised to download the data to use the replicate weights. Users should refer to the study description page or User Guide for further details regarding weights."
}
]
]
}
}
}