{"source":"aws-open-data","name":"AWS open data","kind":"widget","records":[{"id":"1000-genomes-data-lakehouse-ready","title":"1000 Genomes Phase 3 Reanalysis with DRAGEN 3.5 - Data Lakehouse Ready","subtitle":"Sample Queries on the 1000 Genomes, gnomAD and ClinVar data Lake · biology, bioinformatics, genetic, genomic","value":"biology","props":{"slug":"1000-genomes-data-lakehouse-ready","providers":["Sample Queries on the 1000 Genomes, gnomAD and ClinVar data Lake"],"tags":["biology","bioinformatics","genetic","genomic","Homo sapiens","life sciences","parquet","population genetics","vcf"]}},{"id":"1000-genomes","title":"1000 Genomes","subtitle":"Exploring Human Genetic Variation with 1000 Genomes in a Browser-Based Notebook on Scigantic, Examine genomic variation across populations with AWS · aws-pds, genetic, genomic, life sciences","value":"aws-pds","props":{"slug":"1000-genomes","providers":["Exploring Human Genetic Variation with 1000 Genomes in a Browser-Based Notebook on Scigantic","Examine genomic variation across populations with AWS","Exploratory data analysis of genomic datasets using ADAM and Mango with Apache Spark on Amazon EMR"],"tags":["aws-pds","genetic","genomic","life sciences","whole genome sequencing","fastq"]}},{"id":"1kg-ont-vienna","title":"1KG-ONT-VIENNA panel","subtitle":"Long-read sequencing and structural variant characterization in 1,019 samples from the 1000 Genomes Project · genetic, genomic, life sciences, whole genome sequencing","value":"genetic","props":{"slug":"1kg-ont-vienna","providers":["Long-read sequencing and structural variant characterization in 1,019 samples from the 1000 Genomes Project"],"tags":["genetic","genomic","life sciences","whole genome sequencing","fastq","fast5"]}},{"id":"3dcompat","title":"3DCoMPaT: Composition of Materials on Parts of 3D Things","subtitle":"3DCoMPaT: Composition of Materials on Parts of 3D Things · aws-pds, computer vision, machine learning","value":"aws-pds","props":{"slug":"3dcompat","providers":["3DCoMPaT: Composition of Materials on Parts of 3D Things"],"tags":["aws-pds","computer vision","machine learning"]}},{"id":"3kricegenome","title":"3000 Rice Genomes Project","subtitle":"RiceGalaxy, Tracking the origin of two genetic components associated with transposable element bursts in domesticated rice · agriculture, food security, aws-pds, genetic","value":"agriculture","props":{"slug":"3kricegenome","providers":["RiceGalaxy","Tracking the origin of two genetic components associated with transposable element bursts in domesticated rice","Structural variants in 3000 rice genomes","Rice Galaxy: an open resource for plant science","Identification and Allele Combination Analysis of Rice Grain Shape-Related Genes by Genome-Wide Association Study"],"tags":["agriculture","food security","aws-pds","genetic","genomic","life sciences"]}},{"id":"4dnucleome","title":"4D Nucleome (4DN)","subtitle":"Finding and Downloading 4DN Data files, Using jupyterhub on the 4DN data portal · biology, bioinformatics, genetic, genomic","value":"biology","props":{"slug":"4dnucleome","providers":["Finding and Downloading 4DN Data files","Using jupyterhub on the 4DN data portal"],"tags":["biology","bioinformatics","genetic","genomic","imaging","life sciences","aws-pds"]}},{"id":"990-spreadsheets","title":"IRS 990 Filings (Spreadsheets)","subtitle":" · aws-pds, regulatory, statistics, us","value":"aws-pds","props":{"slug":"990-spreadsheets","providers":[],"tags":["aws-pds","regulatory","statistics","us","economics"]}},{"id":"abeja-cc-ja","title":"ABEJA CC JA","subtitle":"Tutorial of ABEJA CC JA dataset, Building a Large-Scale Japanese Corpus from Common Crawl and Its Preprocessing · natural language processing, web archive, internet, japanese","value":"natural language processing","props":{"slug":"abeja-cc-ja","providers":["Tutorial of ABEJA CC JA dataset","Building a Large-Scale Japanese Corpus from Common Crawl and Its Preprocessing"],"tags":["natural language processing","web archive","internet","japanese"]}},{"id":"aef-source","title":"Google Satellite Embedding V1","subtitle":" · aws-pds, machine learning, satellite imagery, aerial imagery","value":"aws-pds","props":{"slug":"aef-source","providers":[],"tags":["aws-pds","machine learning","satellite imagery","aerial imagery","earth observation","imaging"]}},{"id":"aev-a2d2","title":"A2D2: Audi Autonomous Driving Dataset","subtitle":"Autonomous Driving Data Service (ADDS) · autonomous vehicles, deep learning, computer vision, lidar","value":"autonomous vehicles","props":{"slug":"aev-a2d2","providers":["Autonomous Driving Data Service (ADDS)"],"tags":["autonomous vehicles","deep learning","computer vision","lidar","mapping","machine learning","robotics","aws-pds"]}},{"id":"africa-field-boundary-labels","title":"A region-wide, multi-year set of crop field boundary labels for Africa","subtitle":"Instructions on data access and label-making demonstration notebook, Generalization enhancement strategies to enable cross-year cropland mapping with convolutional neural networks trained using historical samples · agriculture, machine learning, land cover, satellite imagery","value":"agriculture","props":{"slug":"africa-field-boundary-labels","providers":["Instructions on data access and label-making demonstration notebook","Generalization enhancement strategies to enable cross-year cropland mapping with convolutional neural networks trained using historical samples","A region-wide, multi-year set of crop field boundary labels for Africa","A region-wide, multi-year set of crop field boundary labels for Africa","Technical report on label develop and processing"],"tags":["agriculture","machine learning","land cover","satellite imagery","cog","labeled"]}},{"id":"afsis","title":"Africa Soil Information Service (AfSIS) Soil Chemistry","subtitle":"AfSIS Soil Chemistry - Usage Tutorial, Goalkeepers 2018, Soil - The Big Data Beneath Your Feet · agriculture, aws-pds, environmental, food security","value":"agriculture","props":{"slug":"afsis","providers":["AfSIS Soil Chemistry - Usage Tutorial","Goalkeepers 2018, Soil - The Big Data Beneath Your Feet"],"tags":["agriculture","aws-pds","environmental","food security","machine learning"]}},{"id":"ag-loam","title":"AG-LOAM Dataset","subtitle":"Source code of the LiDAR-only odometry and mapping system, Adaptive LiDAR Odometry and Mapping for Autonomous Agricultural Mobile Robots in Unmanned Farms · aws-pds, robotics, agriculture, lidar","value":"aws-pds","props":{"slug":"ag-loam","providers":["Source code of the LiDAR-only odometry and mapping system","Adaptive LiDAR Odometry and Mapping for Autonomous Agricultural Mobile Robots in Unmanned Farms","Adaptive LiDAR Odometry and Mapping for Autonomous Agricultural Mobile Robots in Unmanned Farms"],"tags":["aws-pds","robotics","agriculture","lidar","localization","mapping"]}},{"id":"ai3","title":"AI3 Protein-Ligand Binding Affinity Dataset","subtitle":"AI3: Protein-Ligand Binding Affinity Dataset, PLAS-5k: Dataset of Protein-Ligand Affinities from Molecular Dynamics for Machine Learning Applications · pharmaceutical, simulations, health, life sciences","value":"pharmaceutical","props":{"slug":"ai3","providers":["AI3: Protein-Ligand Binding Affinity Dataset","PLAS-5k: Dataset of Protein-Ligand Affinities from Molecular Dynamics for Machine Learning Applications","PLAS-20k: Extended Dataset of Protein-Ligand Affinities from MD Simulations for Machine Learning Applications"],"tags":["pharmaceutical","simulations","health","life sciences","machine learning","protein","molecular dynamics","aws-pds"]}},{"id":"airborne-object-tracking","title":"Airborne Object Tracking Dataset","subtitle":" · amazon.science, computer vision, deep learning, machine learning","value":"amazon.science","props":{"slug":"airborne-object-tracking","providers":[],"tags":["amazon.science","computer vision","deep learning","machine learning"]}},{"id":"aiwp","title":"AI Weather Prediction (AIWP) Model Reforecasts","subtitle":" · environmental, meteorological, weather","value":"environmental","props":{"slug":"aiwp","providers":[],"tags":["environmental","meteorological","weather"]}},{"id":"allen-brain-observatory","title":"Allen Brain Observatory - Visual Coding AWS Public Data Set","subtitle":"Use the Allen Brain Observatory – Visual Coding on AWS · aws-pds, neurobiology, neuroimaging, image processing","value":"aws-pds","props":{"slug":"allen-brain-observatory","providers":["Use the Allen Brain Observatory – Visual Coding on AWS"],"tags":["aws-pds","neurobiology","neuroimaging","image processing","imaging","life sciences","signal processing","electrophysiology","Mus musculus"]}},{"id":"allen-cell-imaging-collections","title":"Allen Cell Imaging Collections","subtitle":"Allen Cell Feature Explorer, Allen Cell Structure Segmenter · aws-pds, life sciences, biology, cell imaging","value":"aws-pds","props":{"slug":"allen-cell-imaging-collections","providers":["Allen Cell Feature Explorer","Allen Cell Structure Segmenter","Pytorch 3D Integrated Cell","AICS Volume Viewer","Visual Guide to Human Cells"],"tags":["aws-pds","life sciences","biology","cell imaging","cell biology","microscopy","image processing","machine learning","Homo sapiens"]}},{"id":"allen-hmba-releases","title":"Human and Mammalian Brain Atlas","subtitle":"Human-Mammalian Brain - Basal Ganglia - Data, Human-Mammalian Brain - CCF Book · aws-pds, biology, gene expression, neurobiology","value":"aws-pds","props":{"slug":"allen-hmba-releases","providers":["Human-Mammalian Brain - Basal Ganglia - Data","Human-Mammalian Brain - CCF Book","HMBA Basal Ganglia resources in Brain Knowledge Platform's Data Catalog"],"tags":["aws-pds","biology","gene expression","neurobiology","life sciences","single-cell transcriptomics","Mus musculus","Homo sapiens","non-human primate"]}},{"id":"allen-it-connectivity","title":"Allen institute intratelencephalic neuron connectivity paper supplemental data","subtitle":"IT-circuit-Figures-clean, Cell-type-specific parallel pathways in the canonical cortical microcircuit · electron microscopy, imaging, neuroscience, connectomics","value":"electron microscopy","props":{"slug":"allen-it-connectivity","providers":["IT-circuit-Figures-clean","Cell-type-specific parallel pathways in the canonical cortical microcircuit"],"tags":["electron microscopy","imaging","neuroscience","connectomics","life sciences","aws-pds"]}},{"id":"allen-ivy-glioblastoma-atlas","title":"Allen Ivy Glioblastoma Atlas","subtitle":"Accessing Ivy Glioblastoma Atlas Project data, Ivy Glioblastoma Atlas Project · aws-pds, biology, genetic, gene expression","value":"aws-pds","props":{"slug":"allen-ivy-glioblastoma-atlas","providers":["Accessing Ivy Glioblastoma Atlas Project data","Ivy Glioblastoma Atlas Project","An anatomic transcriptional atlas of human glioblastoma"],"tags":["aws-pds","biology","genetic","gene expression","imaging","neurobiology","image processing","life sciences","machine learning","computer vision","cancer","glioblastoma","Homo sapiens"]}},{"id":"allen-mouse-brain-atlas","title":"Allen Mouse Brain Atlas","subtitle":"Accessing Allen Mouse Brain Atlas data, Allen Mouse Brain Atlas · aws-pds, biology, genetic, gene expression","value":"aws-pds","props":{"slug":"allen-mouse-brain-atlas","providers":["Accessing Allen Mouse Brain Atlas data","Allen Mouse Brain Atlas","Genome-wide atlas of gene expression in the adult mouse brain"],"tags":["aws-pds","biology","genetic","gene expression","imaging","neurobiology","image processing","life sciences","transcriptomics","Mus musculus"]}},{"id":"allen-nd-ephys-compression","title":"Allen Institute for Neural Dynamics - Extracellular Electrophysiology Compression Benchmark","subtitle":"AIND ephys compression benchmark data, Compression strategies for large-scale electrophysiology data · aws-pds, neurobiology, life sciences, signal processing","value":"aws-pds","props":{"slug":"allen-nd-ephys-compression","providers":["AIND ephys compression benchmark data","Compression strategies for large-scale electrophysiology data"],"tags":["aws-pds","neurobiology","life sciences","signal processing","electrophysiology","Mus musculus"]}},{"id":"allen-nd-ephys-hybrid-evaluation","title":"Allen Institute for Neural Dynamics - Extracellular Electrophysiology Hybrid Evaluation Benchmark","subtitle":"AIND ephys hybrid evaluation benchmark data, Efficient and reproducible pipelines for spike sorting large-scale electrophysiology data · aws-pds, neurobiology, life sciences, signal processing","value":"aws-pds","props":{"slug":"allen-nd-ephys-hybrid-evaluation","providers":["AIND ephys hybrid evaluation benchmark data","Efficient and reproducible pipelines for spike sorting large-scale electrophysiology data"],"tags":["aws-pds","neurobiology","life sciences","signal processing","electrophysiology","Mus musculus"]}},{"id":"allen-nd-open-data","title":"Allen Institute for Neural Dynamics - Mouse Neuroanatomy and Physiology Data","subtitle":"AIND Open Data Access · aws-pds, neurobiology, neuroimaging, image processing","value":"aws-pds","props":{"slug":"allen-nd-open-data","providers":["AIND Open Data Access"],"tags":["aws-pds","neurobiology","neuroimaging","image processing","imaging","life sciences","signal processing","electrophysiology","Mus musculus"]}},{"id":"allen-sea-ad-atlas","title":"Seattle Alzheimer's Disease Brain Cell Atlas (SEA-AD)","subtitle":"Seattle Alzheimer’s Disease Brain Cell Atlas · aws-pds, biology, cell biology, cell imaging","value":"aws-pds","props":{"slug":"allen-sea-ad-atlas","providers":["Seattle Alzheimer’s Disease Brain Cell Atlas"],"tags":["aws-pds","biology","cell biology","cell imaging","epigenomics","gene expression","histopathology","Homo sapiens","imaging","medicine","microscopy","neurobiology","neuroscience","single-cell transcriptomics","transcriptomics","life sciences"]}},{"id":"allen-synphys","title":"Allen Institute for Brain Science - Synaptic Physiology Public Data Set","subtitle":"aisynphys python package for accessing synaptic physiology data, Local connectivity and synaptic dynamics in mouse and human neocortex · aws-pds, neurobiology, life sciences, signal processing","value":"aws-pds","props":{"slug":"allen-synphys","providers":["aisynphys python package for accessing synaptic physiology data","Local connectivity and synaptic dynamics in mouse and human neocortex"],"tags":["aws-pds","neurobiology","life sciences","signal processing","electrophysiology","Mus musculus","Homo sapiens"]}},{"id":"allenai-arc","title":"AI2 Reasoning Challenge (ARC) 2018","subtitle":"Think you have Solved Question Answering? Try ARC, the AI2 Reasoning Challengg · aws-pds, machine learning, json, csv","value":"aws-pds","props":{"slug":"allenai-arc","providers":["Think you have Solved Question Answering? Try ARC, the AI2 Reasoning Challengg"],"tags":["aws-pds","machine learning","json","csv"]}},{"id":"allenai-aristo-mini","title":"Aristo Mini Corpus","subtitle":" · aws-pds, machine learning, json, csv","value":"aws-pds","props":{"slug":"allenai-aristo-mini","providers":[],"tags":["aws-pds","machine learning","json","csv"]}},{"id":"allenai-diagrams","title":"AI2 Diagram Dataset (AI2D)","subtitle":"A Diagram is Worth a Dozen Images · aws-pds, machine learning","value":"aws-pds","props":{"slug":"allenai-diagrams","providers":["A Diagram is Worth a Dozen Images"],"tags":["aws-pds","machine learning"]}},{"id":"allenai-drop","title":"Discrete Reasoning Over the content of Paragraphs (DROP)","subtitle":"DROP: A Reading Comprehension Benchmark Requiring Discrete Reasoning Over Paragraphs · aws-pds, machine learning, natural language processing","value":"aws-pds","props":{"slug":"allenai-drop","providers":["DROP: A Reading Comprehension Benchmark Requiring Discrete Reasoning Over Paragraphs"],"tags":["aws-pds","machine learning","natural language processing"]}},{"id":"allenai-meaningful-citations","title":"AI2 Meaningful Citations Data Set","subtitle":"Identifying Meaningful Citations · aws-pds, machine learning, csv","value":"aws-pds","props":{"slug":"allenai-meaningful-citations","providers":["Identifying Meaningful Citations"],"tags":["aws-pds","machine learning","csv"]}},{"id":"allenai-quoref","title":"Quoref","subtitle":"Quoref: A Reading Comprehension Dataset with Questions Requiring Coreferential Reasoning · aws-pds, machine learning, natural language processing","value":"aws-pds","props":{"slug":"allenai-quoref","providers":["Quoref: A Reading Comprehension Dataset with Questions Requiring Coreferential Reasoning"],"tags":["aws-pds","machine learning","natural language processing"]}},{"id":"allenai-ropes","title":"Reasoning Over Paragraph Effects in Situations (ROPES)","subtitle":"Reasoning Over Paragraph Effects in Situations · aws-pds, machine learning, natural language processing, json","value":"aws-pds","props":{"slug":"allenai-ropes","providers":["Reasoning Over Paragraph Effects in Situations"],"tags":["aws-pds","machine learning","natural language processing","json"]}},{"id":"allenai-tablestore-questions","title":"AI2 TabMCQ: Multiple Choice Questions aligned with the Aristo Tablestore","subtitle":" · aws-pds, machine learning, natural language processing","value":"aws-pds","props":{"slug":"allenai-tablestore-questions","providers":[],"tags":["aws-pds","machine learning","natural language processing"]}},{"id":"allenai-tablestore","title":"AI2 Tablestore (November 2015 Snapshot)","subtitle":" · aws-pds, machine learning, natural language processing","value":"aws-pds","props":{"slug":"allenai-tablestore","providers":[],"tags":["aws-pds","machine learning","natural language processing"]}},{"id":"allenai-tqa","title":"Textbook Question Answering (TQA)","subtitle":" · ","value":"","props":{"slug":"allenai-tqa","providers":[],"tags":[]}},{"id":"allenai-tuple-kb","title":"Aristo Tuple KB","subtitle":" · aws-pds, machine learning, natural language processing","value":"aws-pds","props":{"slug":"allenai-tuple-kb","providers":[],"tags":["aws-pds","machine learning","natural language processing"]}},{"id":"allenai-zest","title":"ZEST: ZEroShot  learning  from Task descriptions","subtitle":" · aws-pds, machine learning, natural language processing","value":"aws-pds","props":{"slug":"allenai-zest","providers":[],"tags":["aws-pds","machine learning","natural language processing"]}},{"id":"alliance-genome-resources","title":"Alliance of Genome Resources","subtitle":"Alliance of Genome Resources AWS Data Access Tutorials, Alliance of Genome Resources Portal · aws-pds, genomic, bioinformatics, biology","value":"aws-pds","props":{"slug":"alliance-genome-resources","providers":["Alliance of Genome Resources AWS Data Access Tutorials","Alliance of Genome Resources Portal","FlyBase - Drosophila Database","WormBase - C. elegans Database","ZFIN - Zebrafish Database"],"tags":["aws-pds","genomic","bioinformatics","biology","gene expression","life sciences","genetic","genome","Drosophila melanogaster","Caenorhabditis elegans","Danio rerio","Mus musculus","Rattus norvegicus","Homo sapiens","transcriptomics","protein","vcf","fasta"]}},{"id":"allthebacteria","title":"AllTheBacteria","subtitle":"AllTheBacteria - all bacterial genomes assembled, available and searchable · assembly, bacteria, bioinformatics, fasta","value":"assembly","props":{"slug":"allthebacteria","providers":["AllTheBacteria - all bacterial genomes assembled, available and searchable"],"tags":["assembly","bacteria","bioinformatics","fasta","genomic","life sciences","microbial genomics","short read sequencing","whole genome sequencing"]}},{"id":"amazon-berkeley-objects","title":"Amazon Berkeley Objects Dataset","subtitle":" · amazon.science, computer vision, deep learning, information retrieval","value":"amazon.science","props":{"slug":"amazon-berkeley-objects","providers":[],"tags":["amazon.science","computer vision","deep learning","information retrieval","machine learning","machine translation"]}},{"id":"amazon-bin-imagery","title":"Amazon Bin Image Dataset","subtitle":"Amazon Bin Image Dataset Challenge, Amazon Inventory Reconciliation using AI · amazon.science, computer vision, machine learning","value":"amazon.science","props":{"slug":"amazon-bin-imagery","providers":["Amazon Bin Image Dataset Challenge","Amazon Inventory Reconciliation using AI"],"tags":["amazon.science","computer vision","machine learning"]}},{"id":"amazon-conversational-product-search","title":"VoiSeR","subtitle":"VoiSeR: A New Benchmark for Voice-Based Search Refinement · amazon.science, natural language processing, machine learning, information retrieval","value":"amazon.science","props":{"slug":"amazon-conversational-product-search","providers":["VoiSeR: A New Benchmark for Voice-Based Search Refinement"],"tags":["amazon.science","natural language processing","machine learning","information retrieval"]}},{"id":"amazon-last-mile-challenges","title":"2021 Amazon Last Mile Routing Research Challenge Dataset","subtitle":"Code repository used for the 2021 Amazon Routing Research Challenge (this repository is included for reference and documentation purposes only, you do not need to install it to access the data), AWS Last Mile Route Sequence Optimization · transportation, machine learning, deep learning, amazon.science","value":"transportation","props":{"slug":"amazon-last-mile-challenges","providers":["Code repository used for the 2021 Amazon Routing Research Challenge (this repository is included for reference and documentation purposes only, you do not need to install it to access the data)","AWS Last Mile Route Sequence Optimization","2021 Amazon Last Mile Routing Research Challenge: Data Set","Can language models be used for real-world urban-delivery route optimization?","Constrained Local Search for Last-Mile Routing"],"tags":["transportation","machine learning","deep learning","amazon.science","urban","analytics","geospatial","logistics","last mile","optimization","routing"]}},{"id":"amazon-pqa","title":"Amazon-PQA","subtitle":"Answering Product-Questions by Utilizing Questions from Other Contextually Similar Products · amazon.science, natural language processing, machine learning","value":"amazon.science","props":{"slug":"amazon-pqa","providers":["Answering Product-Questions by Utilizing Questions from Other Contextually Similar Products"],"tags":["amazon.science","natural language processing","machine learning"]}},{"id":"amazon-reviews-ml","title":"The Multilingual Amazon Reviews Corpus","subtitle":"The Multilingual Amazon Reviews Corpus · amazon.science, natural language processing, machine learning","value":"amazon.science","props":{"slug":"amazon-reviews-ml","providers":["The Multilingual Amazon Reviews Corpus"],"tags":["amazon.science","natural language processing","machine learning"]}},{"id":"amazon-seller-contact-intent-sequence","title":"Amazon Seller Contact Intent Sequence","subtitle":" · amazon.science, machine learning, temporal point process, Hawkes Process","value":"amazon.science","props":{"slug":"amazon-seller-contact-intent-sequence","providers":[],"tags":["amazon.science","machine learning","temporal point process","Hawkes Process"]}},{"id":"amazonia","title":"Amazonia EO satellite on AWS","subtitle":"Keeping a SpatioTemporal Asset Catalog (STAC) Up To Date with SNS/SQS, The Evolution of ASDI's Data Infrastructure · aws-pds, agriculture, earth observation, geospatial","value":"aws-pds","props":{"slug":"amazonia","providers":["Keeping a SpatioTemporal Asset Catalog (STAC) Up To Date with SNS/SQS","The Evolution of ASDI's Data Infrastructure","STAC V1.0.0 endpoint","Amazonia 1 stactools package","Amazonia 1 stactools-pipeline"],"tags":["aws-pds","agriculture","earth observation","geospatial","imaging","satellite imagery","sustainability","disaster response","stac","cog"]}},{"id":"answer-reformulation","title":"Answer Reformulation","subtitle":"Voice-based Reformulation of Community Answers · amazon.science, natural language processing, machine learning","value":"amazon.science","props":{"slug":"answer-reformulation","providers":["Voice-based Reformulation of Community Answers"],"tags":["amazon.science","natural language processing","machine learning"]}}],"count":50,"generated_at":"2026-06-13T07:16:45.950Z"}