{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Partitioning and formatting of the raw vioscreen information.\n", "\n", "This notebook consumes the raw vioscreen data, and splits it up into a series of pivot tables.\n", "\n", "The pivot tables are indexed by the survey ID. A survey ID corresponds to a single instance of a vioscreen FFQ. Separate mappings, not included at present, relate the survey ID to a host subject identifier and/or sample barcode.\n", "\n", "All headers are scrubbed to avoid annoying characters like spaces. This is done for the benefit of tools like QIIME, however it does reduce readability in some cases.\n", "\n", "A roundtrip test going from raw data to the summarized micro / macronutrients is included at the end of this notebook." ] }, { "cell_type": "code", "execution_count": 283, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import re\n", "import numpy as np\n", "from collections import defaultdict" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# full raw vioscreen data\n", "full_vios = pd.read_csv('vioscreen_dump.tsv.gz', sep='\\t', dtype=str)" ] }, { "cell_type": "code", "execution_count": 355, "metadata": {}, "outputs": [], "source": [ "# a raw dump of the ag.source_barcodes_surveys table\n", "survey_sample_map_raw = pd.read_csv('sid_bc.tsv', \n", " sep='\\t', dtype=str).set_index('survey_id')['barcode']\n", "survey_sample_map = defaultdict(list)\n", "for i, s in survey_sample_map_raw.items():\n", " survey_sample_map[i].append('10317.%s' % s)" ] }, { "cell_type": "code", "execution_count": 339, "metadata": {}, "outputs": [], "source": [ "def drop_null_cols(df):\n", " \"\"\"Drop a column that is entirely null\"\"\"\n", " drop = []\n", " n = len(df)\n", " for c in df.columns:\n", " if df[c].isnull().sum() == n:\n", " drop.append(c)\n", " return df.drop(columns=drop)\n", "\n", "replacer = re.compile(\"[ \\-,()%':]\")\n", "def sanitize_columns(df):\n", " \"\"\"Remove possibly annoying characters\"\"\"\n", " df.columns = [replacer.sub('_', c) for c in df.columns]\n", " return df\n", "\n", "def remap_to_sample_id(df):\n", " new_rows = []\n", " sample_ids = []\n", " for _, row in df.reset_index().iterrows():\n", " for id_ in survey_sample_map.get(row['survey_id'], []):\n", " new_rows.append(row.copy())\n", " sample_ids.append(id_)\n", " new_df = pd.DataFrame(new_rows)\n", " new_df['#SampleID'] = sample_ids\n", " return new_df.set_index('#SampleID')" ] }, { "cell_type": "code", "execution_count": 123, "metadata": {}, "outputs": [], "source": [ "# partition the raw vioscreen data into logically related units.\n", "# within each partition, columns that are entirely null are removed.\n", "\n", "# a few specific values are used for this:\n", "# - \"code\" should correspond to a specific micro/macro nutrient or summarized \n", "# value (e.g., % calories from alcohol)\n", "# - \"data\" contains the raw micro/macro nutrient information for the given food\n", "# entry for the given survey scaled (I believe) \"servingFrequencyText\" and the \"servingSizeText\". \n", "\n", "# where the \"code\" column is not null\n", "vios_coded = drop_null_cols(full_vios[~full_vios.code.isnull()])\n", "\n", "# what should be food components (e.g., % protein)\n", "vios_coded_percents = drop_null_cols(vios_coded[~vios_coded.foodComponentType.isnull()])\n", "\n", "# what should be only micro and macronutriet data\n", "vios_coded_micromacro = drop_null_cols(vios_coded[np.logical_and(vios_coded.foodComponentType.isnull(),\n", " vios_coded.description != 'Eating Pattern')])\n", "\n", "# what vioscreen terms eating patterns (e.g., added fats per day)\n", "vios_coded_eatingpattern = drop_null_cols(vios_coded[vios_coded.description == 'Eating Pattern'])\n", "\n", "# entries that are not coded\n", "vios_not_coded = drop_null_cols(full_vios[full_vios.code.isnull()])\n", "\n", "# contains the raw vioscreen json output (more below)\n", "vios_with_data = drop_null_cols(vios_not_coded[~vios_not_coded.data.isnull()])\n", "\n", "# the vioscreen scores (e.g., Total Fruits)\n", "vios_no_data = drop_null_cols(vios_not_coded[vios_not_coded.data.isnull()])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Formatting food components" ] }, { "cell_type": "code", "execution_count": 69, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
amountcodedescriptionfoodComponentTypeprecisionshortDescriptionsurvey_idunits
15315.49482152820182%proteinPercent of calories from Protein1.00.0Protein80043f5209506497%
15444.07939989979141%fatPercent of calories from Fat1.00.0Fat80043f5209506497%
15540.42577857200676%carboPercent of calories from Carbohydrate1.00.0Carbohydrate80043f5209506497%
1560.0%alcoholPercent of calories from Alcohol1.00.0Alcohol80043f5209506497%
15716.382593411285388%sfatotPercent of calories from Saturated Fat1.00.0Saturated Fat80043f5209506497%
\n", "
" ], "text/plain": [ " amount code description \\\n", "153 15.49482152820182 %protein Percent of calories from Protein \n", "154 44.07939989979141 %fat Percent of calories from Fat \n", "155 40.42577857200676 %carbo Percent of calories from Carbohydrate \n", "156 0.0 %alcohol Percent of calories from Alcohol \n", "157 16.382593411285388 %sfatot Percent of calories from Saturated Fat \n", "\n", " foodComponentType precision shortDescription survey_id units \n", "153 1.0 0.0 Protein 80043f5209506497 % \n", "154 1.0 0.0 Fat 80043f5209506497 % \n", "155 1.0 0.0 Carbohydrate 80043f5209506497 % \n", "156 1.0 0.0 Alcohol 80043f5209506497 % \n", "157 1.0 0.0 Saturated Fat 80043f5209506497 % " ] }, "execution_count": 69, "metadata": {}, "output_type": "execute_result" } ], "source": [ "vios_coded_percents.head()" ] }, { "cell_type": "code", "execution_count": 356, "metadata": {}, "outputs": [], "source": [ "vios_coded_percents_pivot = vios_coded_percents.pivot(index='survey_id', columns='description', values='amount')\n", "vios_coded_percents_pivot = remap_to_sample_id(sanitize_columns(vios_coded_percents_pivot))\n", "vios_coded_percents_pivot.to_csv('vioscreen_component_percents.tsv', sep='\\t', index=True, header=True)" ] }, { "cell_type": "code", "execution_count": 357, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
survey_idPercent_of_calories_from_Added_SugarPercent_of_calories_from_AlcoholPercent_of_calories_from_CarbohydratePercent_of_calories_from_FatPercent_of_calories_from_Monounsaturated_FatPercent_of_calories_from_Polyunsaturated_FatPercent_of_calories_from_ProteinPercent_of_calories_from_Saturated_Fat
#SampleID
10317.0000722910000904d9779a86c0.52780941488965524.40570793074189627.62818967046378745.08447761866925420.9562133190077812.55991201588331522.88162478012506510.631030850565056
10317.000107687000fc7ac577765790.460659717473422940.023.47477932865937355.74595413173178622.4894765492720948.21960181230924120.7792665396088422.733751192146702
10317.0000876630012cf765bb386ea0.86577856467636330.0002396235700214125437.527984172228843.0481462887077121.85414256395016610.35192853067319.423629915493469.169527931007428
10317.000030578001b4c3e1c848f580.8644366142736037.64362487567000738.4190548808320341.8275313241228521.0202846763430788.664538309722112.10978891937511311.298838478082244
10317.000030579001b4c3e1c848f580.8644366142736037.64362487567000738.4190548808320341.8275313241228521.0202846763430788.664538309722112.10978891937511311.298838478082244
\n", "
" ], "text/plain": [ " survey_id Percent_of_calories_from_Added_Sugar \\\n", "#SampleID \n", "10317.000072291 0000904d9779a86c 0.5278094148896552 \n", "10317.000107687 000fc7ac57776579 0.46065971747342294 \n", "10317.000087663 0012cf765bb386ea 0.8657785646763633 \n", "10317.000030578 001b4c3e1c848f58 0.864436614273603 \n", "10317.000030579 001b4c3e1c848f58 0.864436614273603 \n", "\n", " Percent_of_calories_from_Alcohol \\\n", "#SampleID \n", "10317.000072291 4.405707930741896 \n", "10317.000107687 0.0 \n", "10317.000087663 0.00023962357002141254 \n", "10317.000030578 7.643624875670007 \n", "10317.000030579 7.643624875670007 \n", "\n", " Percent_of_calories_from_Carbohydrate \\\n", "#SampleID \n", "10317.000072291 27.628189670463787 \n", "10317.000107687 23.474779328659373 \n", "10317.000087663 37.5279841722288 \n", "10317.000030578 38.41905488083203 \n", "10317.000030579 38.41905488083203 \n", "\n", " Percent_of_calories_from_Fat \\\n", "#SampleID \n", "10317.000072291 45.084477618669254 \n", "10317.000107687 55.745954131731786 \n", "10317.000087663 43.04814628870771 \n", "10317.000030578 41.82753132412285 \n", "10317.000030579 41.82753132412285 \n", "\n", " Percent_of_calories_from_Monounsaturated_Fat \\\n", "#SampleID \n", "10317.000072291 20.95621331900778 \n", "10317.000107687 22.489476549272094 \n", "10317.000087663 21.854142563950166 \n", "10317.000030578 21.020284676343078 \n", "10317.000030579 21.020284676343078 \n", "\n", " Percent_of_calories_from_Polyunsaturated_Fat \\\n", "#SampleID \n", "10317.000072291 12.559912015883315 \n", "10317.000107687 8.219601812309241 \n", "10317.000087663 10.351928530673 \n", "10317.000030578 8.6645383097221 \n", "10317.000030579 8.6645383097221 \n", "\n", " Percent_of_calories_from_Protein \\\n", "#SampleID \n", "10317.000072291 22.881624780125065 \n", "10317.000107687 20.77926653960884 \n", "10317.000087663 19.42362991549346 \n", "10317.000030578 12.109788919375113 \n", "10317.000030579 12.109788919375113 \n", "\n", " Percent_of_calories_from_Saturated_Fat \n", "#SampleID \n", "10317.000072291 10.631030850565056 \n", "10317.000107687 22.733751192146702 \n", "10317.000087663 9.169527931007428 \n", "10317.000030578 11.298838478082244 \n", "10317.000030579 11.298838478082244 " ] }, "execution_count": 357, "metadata": {}, "output_type": "execute_result" } ], "source": [ "vios_coded_percents_pivot.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Formatting micro and macro nutrients\n", "\n", "NOTE: headers are augmented to include the unit of measurement (e.g., grams)." ] }, { "cell_type": "code", "execution_count": 358, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
amountcodedescriptionsurvey_idunitsvalueTypedescription_augmented
080.4870014190674acesupotAcesulfame Potassium80043f5209506497mgAmountAcesulfame Potassium_in_mg
123.9946937142986addsugarAdded Sugars (by Available Carbohydrate)80043f5209506497gAmountAdded Sugars (by Available Carbohydrate)_in_g
222.221421156844adsugtotAdded Sugars (by Total Sugars)80043f5209506497gAmountAdded Sugars (by Total Sugars)_in_g
32.82355429149195alanineAlanine80043f5209506497gAmountAlanine_in_g
40.0alcoholAlcohol80043f5209506497gAmountAlcohol_in_g
\n", "
" ], "text/plain": [ " amount code description \\\n", "0 80.4870014190674 acesupot Acesulfame Potassium \n", "1 23.9946937142986 addsugar Added Sugars (by Available Carbohydrate) \n", "2 22.221421156844 adsugtot Added Sugars (by Total Sugars) \n", "3 2.82355429149195 alanine Alanine \n", "4 0.0 alcohol Alcohol \n", "\n", " survey_id units valueType \\\n", "0 80043f5209506497 mg Amount \n", "1 80043f5209506497 g Amount \n", "2 80043f5209506497 g Amount \n", "3 80043f5209506497 g Amount \n", "4 80043f5209506497 g Amount \n", "\n", " description_augmented \n", "0 Acesulfame Potassium_in_mg \n", "1 Added Sugars (by Available Carbohydrate)_in_g \n", "2 Added Sugars (by Total Sugars)_in_g \n", "3 Alanine_in_g \n", "4 Alcohol_in_g " ] }, "execution_count": 358, "metadata": {}, "output_type": "execute_result" } ], "source": [ "vios_coded_micromacro.head()" ] }, { "cell_type": "code", "execution_count": 359, "metadata": {}, "outputs": [], "source": [ "vios_coded_micromacro = vios_coded_micromacro.copy()\n", "vios_coded_micromacro['description_augmented'] = [\"%s_in_%s\" % (desc, u)\n", " for desc, u in zip(vios_coded_micromacro['description'],\n", " vios_coded_micromacro['units'])]\n", "vios_coded_micromacro_pivot = vios_coded_micromacro.pivot(index='survey_id', \n", " columns='description_augmented', \n", " values='amount')\n", "vios_coded_micromacro_pivot = remap_to_sample_id(sanitize_columns(vios_coded_micromacro_pivot))\n", "vios_coded_micromacro_pivot.to_csv('vioscreen_micromacro.tsv', sep='\\t', index=True, header=True)" ] }, { "cell_type": "code", "execution_count": 360, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
survey_id3_Methylhistidine_in_mgAcesulfame_Potassium_in_mgAdded_Sugars__by_Available_Carbohydrate__in_gAdded_Sugars__by_Total_Sugars__in_gAlanine_in_gAlcohol_in_gAlpha_Carotene__provitamin_A_carotenoid__in_mcgAlpha_Tocopherol_in_mgAnimal_Protein_in_g...Vitamin_D__calciferol__in_mcgVitamin_D2__ergocalciferol__in_mcgVitamin_D3__cholecalciferol__in_mcgVitamin_D_in_IUVitamin_E_in_IUVitamin_K__phylloquinone__in_mcgWater_in_gWhole_Grains__ounce_equivalents__in_oz_eqXylitol_in_gZinc_in_mg
#SampleID
10317.0000722910000904d9779a86c27.17473738144520.013.456263581612612.41200581992815.5148913669893812.33493100564071138.3693367724228.570242880412265.1852377337893...12.47429576975320.20021671186877412.2740790684938498.97183079012750.9459459997824224.1948876512322149.699640331540.6176449165267570.036749158714310612.8837308617062
10317.000107687000fc7ac5777657928.29629628690960.012.525881662025811.64839549383076.428278789207080.03101.3664124423315.173522354148994.5589348006414...8.93460039492310.08.9346003949231357.38401579692422.6390968368538931.2493543376163629.7977128350.4313863179455060.021155877459083314.6265771027999
10317.0000876630012cf765bb386ea18.27126494744050.020.729648963081715.87326049762363.500579259061380.000514735905194579565.52563323805915.83622021447839.3703097824671...2.221692123084290.02.2216921230842988.867684923371723.6360169491142162.853881004454291.305693414810.00.023628003484923610.289681270179
10317.000030578001b4c3e1c848f582.606841698043970.025.437240471091220.47622005868272.1478857560624917.79444611908332840.6904411129811.345112933610816.6596165030072...3.581943928264411.319241993027522.26270193523689143.27775713057616.909548815799201.2260175150733558.758279353171.694062498660940.03424382085021278.10011550652904
10317.000030579001b4c3e1c848f582.606841698043970.025.437240471091220.47622005868272.1478857560624917.79444611908332840.6904411129811.345112933610816.6596165030072...3.581943928264411.319241993027522.26270193523689143.27775713057616.909548815799201.2260175150733558.758279353171.694062498660940.03424382085021278.10011550652904
\n", "

5 rows × 194 columns

\n", "
" ], "text/plain": [ " survey_id 3_Methylhistidine_in_mg \\\n", "#SampleID \n", "10317.000072291 0000904d9779a86c 27.1747373814452 \n", "10317.000107687 000fc7ac57776579 28.2962962869096 \n", "10317.000087663 0012cf765bb386ea 18.2712649474405 \n", "10317.000030578 001b4c3e1c848f58 2.60684169804397 \n", "10317.000030579 001b4c3e1c848f58 2.60684169804397 \n", "\n", " Acesulfame_Potassium_in_mg \\\n", "#SampleID \n", "10317.000072291 0.0 \n", "10317.000107687 0.0 \n", "10317.000087663 0.0 \n", "10317.000030578 0.0 \n", "10317.000030579 0.0 \n", "\n", " Added_Sugars__by_Available_Carbohydrate__in_g \\\n", "#SampleID \n", "10317.000072291 13.4562635816126 \n", "10317.000107687 12.5258816620258 \n", "10317.000087663 20.7296489630817 \n", "10317.000030578 25.4372404710912 \n", "10317.000030579 25.4372404710912 \n", "\n", " Added_Sugars__by_Total_Sugars__in_g Alanine_in_g \\\n", "#SampleID \n", "10317.000072291 12.4120058199281 5.51489136698938 \n", "10317.000107687 11.6483954938307 6.42827878920708 \n", "10317.000087663 15.8732604976236 3.50057925906138 \n", "10317.000030578 20.4762200586827 2.14788575606249 \n", "10317.000030579 20.4762200586827 2.14788575606249 \n", "\n", " Alcohol_in_g \\\n", "#SampleID \n", "10317.000072291 12.3349310056407 \n", "10317.000107687 0.0 \n", "10317.000087663 0.000514735905194579 \n", "10317.000030578 17.7944461190833 \n", "10317.000030579 17.7944461190833 \n", "\n", " Alpha_Carotene__provitamin_A_carotenoid__in_mcg \\\n", "#SampleID \n", "10317.000072291 1138.36933677242 \n", "10317.000107687 3101.36641244233 \n", "10317.000087663 565.525633238059 \n", "10317.000030578 2840.69044111298 \n", "10317.000030579 2840.69044111298 \n", "\n", " Alpha_Tocopherol_in_mg Animal_Protein_in_g ... \\\n", "#SampleID ... \n", "10317.000072291 28.5702428804122 65.1852377337893 ... \n", "10317.000107687 15.1735223541489 94.5589348006414 ... \n", "10317.000087663 15.836220214478 39.3703097824671 ... \n", "10317.000030578 11.3451129336108 16.6596165030072 ... \n", "10317.000030579 11.3451129336108 16.6596165030072 ... \n", "\n", " Vitamin_D__calciferol__in_mcg \\\n", "#SampleID \n", "10317.000072291 12.4742957697532 \n", "10317.000107687 8.9346003949231 \n", "10317.000087663 2.22169212308429 \n", "10317.000030578 3.58194392826441 \n", "10317.000030579 3.58194392826441 \n", "\n", " Vitamin_D2__ergocalciferol__in_mcg \\\n", "#SampleID \n", "10317.000072291 0.200216711868774 \n", "10317.000107687 0.0 \n", "10317.000087663 0.0 \n", "10317.000030578 1.31924199302752 \n", "10317.000030579 1.31924199302752 \n", "\n", " Vitamin_D3__cholecalciferol__in_mcg Vitamin_D_in_IU \\\n", "#SampleID \n", "10317.000072291 12.2740790684938 498.971830790127 \n", "10317.000107687 8.9346003949231 357.384015796924 \n", "10317.000087663 2.22169212308429 88.8676849233717 \n", "10317.000030578 2.26270193523689 143.277757130576 \n", "10317.000030579 2.26270193523689 143.277757130576 \n", "\n", " Vitamin_E_in_IU Vitamin_K__phylloquinone__in_mcg \\\n", "#SampleID \n", "10317.000072291 50.9459459997824 224.194887651232 \n", "10317.000107687 22.6390968368538 931.249354337616 \n", "10317.000087663 23.6360169491142 162.85388100445 \n", "10317.000030578 16.909548815799 201.226017515073 \n", "10317.000030579 16.909548815799 201.226017515073 \n", "\n", " Water_in_g Whole_Grains__ounce_equivalents__in_oz_eq \\\n", "#SampleID \n", "10317.000072291 2149.69964033154 0.617644916526757 \n", "10317.000107687 3629.797712835 0.431386317945506 \n", "10317.000087663 4291.30569341481 0.0 \n", "10317.000030578 3558.75827935317 1.69406249866094 \n", "10317.000030579 3558.75827935317 1.69406249866094 \n", "\n", " Xylitol_in_g Zinc_in_mg \n", "#SampleID \n", "10317.000072291 0.0367491587143106 12.8837308617062 \n", "10317.000107687 0.0211558774590833 14.6265771027999 \n", "10317.000087663 0.0236280034849236 10.289681270179 \n", "10317.000030578 0.0342438208502127 8.10011550652904 \n", "10317.000030579 0.0342438208502127 8.10011550652904 \n", "\n", "[5 rows x 194 columns]" ] }, "execution_count": 360, "metadata": {}, "output_type": "execute_result" } ], "source": [ "vios_coded_micromacro_pivot.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Formatting eating patterns\n", "\n", "NOTE: headers are augmented to include the time unit (e.g., per day)." ] }, { "cell_type": "code", "execution_count": 361, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
amountcodedescriptionsurvey_idunitsvalueTypecode_augmented
2016.83359985351562ADDEDFATSEating Pattern80043f5209506497PerDayAmountADDEDFATS_PerDay
2020.0ALCOHOLSERVEating Pattern80043f5209506497PerDayAmountALCOHOLSERV_PerDay
2031.24581536668154ANIMALPROTEINEating Pattern80043f5209506497PerDayAmountANIMALPROTEIN_PerDay
2041.8388000404998CALCDAIRYSERVEating Pattern80043f5209506497PerDayAmountCALCDAIRYSERV_PerDay
2053.4041772177448CALCSERVEating Pattern80043f5209506497PerDayAmountCALCSERV_PerDay
\n", "
" ], "text/plain": [ " amount code description survey_id \\\n", "201 6.83359985351562 ADDEDFATS Eating Pattern 80043f5209506497 \n", "202 0.0 ALCOHOLSERV Eating Pattern 80043f5209506497 \n", "203 1.24581536668154 ANIMALPROTEIN Eating Pattern 80043f5209506497 \n", "204 1.8388000404998 CALCDAIRYSERV Eating Pattern 80043f5209506497 \n", "205 3.4041772177448 CALCSERV Eating Pattern 80043f5209506497 \n", "\n", " units valueType code_augmented \n", "201 PerDay Amount ADDEDFATS_PerDay \n", "202 PerDay Amount ALCOHOLSERV_PerDay \n", "203 PerDay Amount ANIMALPROTEIN_PerDay \n", "204 PerDay Amount CALCDAIRYSERV_PerDay \n", "205 PerDay Amount CALCSERV_PerDay " ] }, "execution_count": 361, "metadata": {}, "output_type": "execute_result" } ], "source": [ "vios_coded_eatingpattern.head()" ] }, { "cell_type": "code", "execution_count": 362, "metadata": {}, "outputs": [], "source": [ "vios_coded_eatingpattern = vios_coded_eatingpattern.copy()\n", "vios_coded_eatingpattern['code_augmented'] = ['%s_%s' % (c, u)\n", " for c, u in zip(vios_coded_eatingpattern['code'],\n", " vios_coded_eatingpattern['units'])]\n", "vios_coded_eatingpattern_pivot = vios_coded_eatingpattern.pivot(index='survey_id', \n", " columns='code_augmented', \n", " values='amount')\n", "vios_coded_eatingpattern_pivot = remap_to_sample_id(sanitize_columns(vios_coded_eatingpattern_pivot))\n", "vios_coded_eatingpattern_pivot.to_csv('vioscreen_eatingpatterns.tsv', sep='\\t', index=True, header=True)" ] }, { "cell_type": "code", "execution_count": 363, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
survey_idADDEDFATS_PerDayALCOHOLSERV_PerDayANIMALPROTEIN_PerDayCALCDAIRYSERV_PerDayCALCSERV_PerDayFISHSERV_PerWeekFRIEDFISH_PerWeekFRTSUMM_PerDayGRAINSERV_PerDayJUICESERV_PerDayLOWFATDAIRYSERV_PerDayNOFRYFISHSERV_PerWeekNONFATDAIRY_PerDayPLANTPROTEIN_PerDaySALADSERV_PerDaySOYFOODS_PerDayVEGSUMM_PerDay
#SampleID
10317.0000722910000904d9779a86c13.71113258070850.8119846304802053.120116609696540.5889682752384222.439300483646620.7571846816637740.03.96134781411070.2036136602101781.568462268150020.106806574618980.7571846816637740.00.7882059979520434.279833872024330.0193534520186796.78067953556929
10317.000107687000fc7ac5777657916.95565679455460.05.550310219244011.027100449456192.855509948536880.4295147307931560.01.092138536478560.5471907262932760.00.3738230111664290.4295147307931560.00.01.057644700024230.08.40219293535005
10317.0000876630012cf765bb386ea10.4601757045610.01.755568210081730.1432416572942311.79335029325020.00.03.278008957595040.00.00.04121947288513180.00.00.09614747844330256.610060040918120.2023950070142758.88691729763619
10317.000030578001b4c3e1c848f589.48775220152451.183595540671031.358986436317580.5089651860398792.376772462010770.03303959467639660.01.459984737910231.765163032813590.00.106806574618980.03303959467639660.00.7120043972991922.130618594280660.4509744948930125.41913946629678
10317.000030579001b4c3e1c848f589.48775220152451.183595540671031.358986436317580.5089651860398792.376772462010770.03303959467639660.01.459984737910231.765163032813590.00.106806574618980.03303959467639660.00.7120043972991922.130618594280660.4509744948930125.41913946629678
\n", "
" ], "text/plain": [ " survey_id ADDEDFATS_PerDay ALCOHOLSERV_PerDay \\\n", "#SampleID \n", "10317.000072291 0000904d9779a86c 13.7111325807085 0.811984630480205 \n", "10317.000107687 000fc7ac57776579 16.9556567945546 0.0 \n", "10317.000087663 0012cf765bb386ea 10.460175704561 0.0 \n", "10317.000030578 001b4c3e1c848f58 9.4877522015245 1.18359554067103 \n", "10317.000030579 001b4c3e1c848f58 9.4877522015245 1.18359554067103 \n", "\n", " ANIMALPROTEIN_PerDay CALCDAIRYSERV_PerDay CALCSERV_PerDay \\\n", "#SampleID \n", "10317.000072291 3.12011660969654 0.588968275238422 2.43930048364662 \n", "10317.000107687 5.55031021924401 1.02710044945619 2.85550994853688 \n", "10317.000087663 1.75556821008173 0.143241657294231 1.7933502932502 \n", "10317.000030578 1.35898643631758 0.508965186039879 2.37677246201077 \n", "10317.000030579 1.35898643631758 0.508965186039879 2.37677246201077 \n", "\n", " FISHSERV_PerWeek FRIEDFISH_PerWeek FRTSUMM_PerDay \\\n", "#SampleID \n", "10317.000072291 0.757184681663774 0.0 3.9613478141107 \n", "10317.000107687 0.429514730793156 0.0 1.09213853647856 \n", "10317.000087663 0.0 0.0 3.27800895759504 \n", "10317.000030578 0.0330395946763966 0.0 1.45998473791023 \n", "10317.000030579 0.0330395946763966 0.0 1.45998473791023 \n", "\n", " GRAINSERV_PerDay JUICESERV_PerDay LOWFATDAIRYSERV_PerDay \\\n", "#SampleID \n", "10317.000072291 0.203613660210178 1.56846226815002 0.10680657461898 \n", "10317.000107687 0.547190726293276 0.0 0.373823011166429 \n", "10317.000087663 0.0 0.0 0.0412194728851318 \n", "10317.000030578 1.76516303281359 0.0 0.10680657461898 \n", "10317.000030579 1.76516303281359 0.0 0.10680657461898 \n", "\n", " NOFRYFISHSERV_PerWeek NONFATDAIRY_PerDay PLANTPROTEIN_PerDay \\\n", "#SampleID \n", "10317.000072291 0.757184681663774 0.0 0.788205997952043 \n", "10317.000107687 0.429514730793156 0.0 0.0 \n", "10317.000087663 0.0 0.0 0.0961474784433025 \n", "10317.000030578 0.0330395946763966 0.0 0.712004397299192 \n", "10317.000030579 0.0330395946763966 0.0 0.712004397299192 \n", "\n", " SALADSERV_PerDay SOYFOODS_PerDay VEGSUMM_PerDay \n", "#SampleID \n", "10317.000072291 4.27983387202433 0.019353452018679 6.78067953556929 \n", "10317.000107687 1.05764470002423 0.0 8.40219293535005 \n", "10317.000087663 6.61006004091812 0.202395007014275 8.88691729763619 \n", "10317.000030578 2.13061859428066 0.450974494893012 5.41913946629678 \n", "10317.000030579 2.13061859428066 0.450974494893012 5.41913946629678 " ] }, "execution_count": 363, "metadata": {}, "output_type": "execute_result" } ], "source": [ "vios_coded_eatingpattern_pivot.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Formatting vioscreen scores\n", "\n", "NOTE: a \"bounds\" file is also produced here as vioscreen provides upper and lower limits." ] }, { "cell_type": "code", "execution_count": 364, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
lowerLimitnamescoresurvey_idtypeupperLimit
2270.0Total Vegetables2.388009959526710480043f5209506497TotalVegetables5.0
2280.0Greens and Beans0.080043f5209506497GreensAndBeans5.0
2290.0Total Fruit5.080043f5209506497TotalFruit5.0
2300.0Whole Fruit5.080043f5209506497WholeFruit5.0
2310.0Whole Grains5.681024214331106580043f5209506497WholeGrains10.0
\n", "
" ], "text/plain": [ " lowerLimit name score survey_id \\\n", "227 0.0 Total Vegetables 2.3880099595267104 80043f5209506497 \n", "228 0.0 Greens and Beans 0.0 80043f5209506497 \n", "229 0.0 Total Fruit 5.0 80043f5209506497 \n", "230 0.0 Whole Fruit 5.0 80043f5209506497 \n", "231 0.0 Whole Grains 5.6810242143311065 80043f5209506497 \n", "\n", " type upperLimit \n", "227 TotalVegetables 5.0 \n", "228 GreensAndBeans 5.0 \n", "229 TotalFruit 5.0 \n", "230 WholeFruit 5.0 \n", "231 WholeGrains 10.0 " ] }, "execution_count": 364, "metadata": {}, "output_type": "execute_result" } ], "source": [ "vios_no_data.head()" ] }, { "cell_type": "code", "execution_count": 366, "metadata": {}, "outputs": [], "source": [ "vios_no_data_pivot = vios_no_data.pivot(index='survey_id', columns='type', values='score')\n", "vios_no_data_pivot = remap_to_sample_id(sanitize_columns(vios_no_data_pivot))\n", "vios_no_data_pivot.to_csv('vioscreen_scores.tsv', sep='\\t', index=True, header=True)\n", "\n", "vios_no_data_pivot_desc = vios_no_data[['type', 'lowerLimit', 'upperLimit']]\n", "vios_no_data_pivot_desc = vios_no_data_pivot_desc[~vios_no_data_pivot_desc.duplicated()]\n", "vios_no_data_pivot_desc = sanitize_columns(vios_no_data_pivot_desc)\n", "vios_no_data_pivot_desc.to_csv('vioscreen_scores_bounds.tsv', sep='\\t', index=False, header=True)" ] }, { "cell_type": "code", "execution_count": 367, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
survey_idDairyEmptyCaloriesFattyAcidsGreensAndBeansRefinedGrainsSeafoodAndPlantProteinsSodiumTotalFruitTotalProteinsTotalScoreTotalVegetablesWholeFruitWholeGrains
#SampleID
10317.0000722910000904d9779a86c2.77391325185297820.010.05.010.05.02.91163951441079855.05.076.420114951928925.05.00.7345621856651521
10317.000107687000fc7ac577765793.449436843682274710.4146845873412991.09455274106328585.010.05.00.01.49006901557428155.051.007095713194655.02.9656814233476521.5926711021858588
10317.0000876630012cf765bb386ea0.765759144726192920.010.05.010.05.00.05.05.070.765759144726195.05.00.0
10317.000030578001b4c3e1c848f583.74401950439962320.010.05.010.05.00.02.89275857563038935.079.097956350140565.05.07.461178270110556
10317.000030579001b4c3e1c848f583.74401950439962320.010.05.010.05.00.02.89275857563038935.079.097956350140565.05.07.461178270110556
\n", "
" ], "text/plain": [ " survey_id Dairy EmptyCalories \\\n", "#SampleID \n", "10317.000072291 0000904d9779a86c 2.773913251852978 20.0 \n", "10317.000107687 000fc7ac57776579 3.4494368436822747 10.414684587341299 \n", "10317.000087663 0012cf765bb386ea 0.7657591447261929 20.0 \n", "10317.000030578 001b4c3e1c848f58 3.744019504399623 20.0 \n", "10317.000030579 001b4c3e1c848f58 3.744019504399623 20.0 \n", "\n", " FattyAcids GreensAndBeans RefinedGrains \\\n", "#SampleID \n", "10317.000072291 10.0 5.0 10.0 \n", "10317.000107687 1.0945527410632858 5.0 10.0 \n", "10317.000087663 10.0 5.0 10.0 \n", "10317.000030578 10.0 5.0 10.0 \n", "10317.000030579 10.0 5.0 10.0 \n", "\n", " SeafoodAndPlantProteins Sodium \\\n", "#SampleID \n", "10317.000072291 5.0 2.9116395144107985 \n", "10317.000107687 5.0 0.0 \n", "10317.000087663 5.0 0.0 \n", "10317.000030578 5.0 0.0 \n", "10317.000030579 5.0 0.0 \n", "\n", " TotalFruit TotalProteins TotalScore \\\n", "#SampleID \n", "10317.000072291 5.0 5.0 76.42011495192892 \n", "10317.000107687 1.4900690155742815 5.0 51.00709571319465 \n", "10317.000087663 5.0 5.0 70.76575914472619 \n", "10317.000030578 2.8927585756303893 5.0 79.09795635014056 \n", "10317.000030579 2.8927585756303893 5.0 79.09795635014056 \n", "\n", " TotalVegetables WholeFruit WholeGrains \n", "#SampleID \n", "10317.000072291 5.0 5.0 0.7345621856651521 \n", "10317.000107687 5.0 2.965681423347652 1.5926711021858588 \n", "10317.000087663 5.0 5.0 0.0 \n", "10317.000030578 5.0 5.0 7.461178270110556 \n", "10317.000030579 5.0 5.0 7.461178270110556 " ] }, "execution_count": 367, "metadata": {}, "output_type": "execute_result" } ], "source": [ "vios_no_data_pivot.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Format frequency information on the foods consumed\n", "\n", "NOTE: two entries are unexpectedly duplicated and are removed." ] }, { "cell_type": "code", "execution_count": 368, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
amountconsumptionAdjustmentcreateddatadescriptionfoodCodefoodGroupfrequencyservingFrequencyTextservingSizeTextsurvey_id
2183.01.02017-07-29T06:56:24.553[{'code': 'acesupot', 'description': 'Acesulfa...Enchiladas and tamales40076Asian, Mexican and Soy Foods286.05-6 per week3 enchiladas or tamales80043f5209506497
2192.01.02017-07-29T06:56:24.553[{'code': 'acesupot', 'description': 'Acesulfa...Fresh garlic, including in cooking30043Sauces and Seasonings365.01 per day2 cloves80043f5209506497
2201.01.02017-07-29T06:56:24.553[{'code': 'acesupot', 'description': 'Acesulfa...Lard, bacon fat or meat drippings (Fat used in...10005Oil or Fat Used in Cooking1460.04 per day2 teaspoons80043f5209506497
2211.51.02017-07-29T06:56:24.553[{'code': 'acesupot', 'description': 'Acesulfa...Meal replacement drinks and shakes such as Sli...90015Meal Replacement Drinks, Sports and Granola Bars365.01 per day1 1/2 cups (12 oz)80043f5209506497
2221.01.02017-07-29T06:56:24.553[{'code': 'acesupot', 'description': 'Acesulfa...Other candy, such as Lifesavers, licorice and ...80015Sweets104.02 per week4 pieces hard candy, 10 Life Savers<SUP>&reg;<...80043f5209506497
\n", "
" ], "text/plain": [ " amount consumptionAdjustment created \\\n", "218 3.0 1.0 2017-07-29T06:56:24.553 \n", "219 2.0 1.0 2017-07-29T06:56:24.553 \n", "220 1.0 1.0 2017-07-29T06:56:24.553 \n", "221 1.5 1.0 2017-07-29T06:56:24.553 \n", "222 1.0 1.0 2017-07-29T06:56:24.553 \n", "\n", " data \\\n", "218 [{'code': 'acesupot', 'description': 'Acesulfa... \n", "219 [{'code': 'acesupot', 'description': 'Acesulfa... \n", "220 [{'code': 'acesupot', 'description': 'Acesulfa... \n", "221 [{'code': 'acesupot', 'description': 'Acesulfa... \n", "222 [{'code': 'acesupot', 'description': 'Acesulfa... \n", "\n", " description foodCode \\\n", "218 Enchiladas and tamales 40076 \n", "219 Fresh garlic, including in cooking 30043 \n", "220 Lard, bacon fat or meat drippings (Fat used in... 10005 \n", "221 Meal replacement drinks and shakes such as Sli... 90015 \n", "222 Other candy, such as Lifesavers, licorice and ... 80015 \n", "\n", " foodGroup frequency \\\n", "218 Asian, Mexican and Soy Foods 286.0 \n", "219 Sauces and Seasonings 365.0 \n", "220 Oil or Fat Used in Cooking 1460.0 \n", "221 Meal Replacement Drinks, Sports and Granola Bars 365.0 \n", "222 Sweets 104.0 \n", "\n", " servingFrequencyText servingSizeText \\\n", "218 5-6 per week 3 enchiladas or tamales \n", "219 1 per day 2 cloves \n", "220 4 per day 2 teaspoons \n", "221 1 per day 1 1/2 cups (12 oz) \n", "222 2 per week 4 pieces hard candy, 10 Life Savers®<... \n", "\n", " survey_id \n", "218 80043f5209506497 \n", "219 80043f5209506497 \n", "220 80043f5209506497 \n", "221 80043f5209506497 \n", "222 80043f5209506497 " ] }, "execution_count": 368, "metadata": {}, "output_type": "execute_result" } ], "source": [ "vios_with_data.head()" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "847b5821f05f211f\n", "d95b890fe63c21d0\n" ] } ], "source": [ "duplicated = []\n", "for i, grp in vios_with_data.groupby('survey_id'):\n", " if len(grp.description) != len(grp.description.unique()):\n", " print(i)\n", " for d, dup in grp.groupby('description'):\n", " if len(dup) > 1:\n", " duplicated.append(dup)" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
amountconsumptionAdjustmentcreateddatadescriptionfoodCodefoodGroupfrequencyservingFrequencyTextservingSizeTextsurvey_id
10162570.51.02018-05-09T15:52:12.827[{'code': 'acesupot', 'description': 'Acesulfa...Soy milk, not fortified (Milk on cooked cereal)120561Cereals and Breads52.01 per week1/2 cup (4 oz)847b5821f05f211f
10162590.6660.52018-05-09T15:52:12.827[{'code': 'acesupot', 'description': 'Acesulfa...Soy milk, not fortified (Milk on cooked cereal)120561Cereals and Breads52.01 per week2/3 cup847b5821f05f211f
\n", "
" ], "text/plain": [ " amount consumptionAdjustment created \\\n", "1016257 0.5 1.0 2018-05-09T15:52:12.827 \n", "1016259 0.666 0.5 2018-05-09T15:52:12.827 \n", "\n", " data \\\n", "1016257 [{'code': 'acesupot', 'description': 'Acesulfa... \n", "1016259 [{'code': 'acesupot', 'description': 'Acesulfa... \n", "\n", " description foodCode \\\n", "1016257 Soy milk, not fortified (Milk on cooked cereal) 120561 \n", "1016259 Soy milk, not fortified (Milk on cooked cereal) 120561 \n", "\n", " foodGroup frequency servingFrequencyText servingSizeText \\\n", "1016257 Cereals and Breads 52.0 1 per week 1/2 cup (4 oz) \n", "1016259 Cereals and Breads 52.0 1 per week 2/3 cup \n", "\n", " survey_id \n", "1016257 847b5821f05f211f \n", "1016259 847b5821f05f211f " ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "duplicated[0]" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
amountconsumptionAdjustmentcreateddatadescriptionfoodCodefoodGroupfrequencyservingFrequencyTextservingSizeTextsurvey_id
4594880.51.02017-07-29T02:28:12.49[{'code': 'acesupot', 'description': 'Acesulfa...Soy milk, not fortified (Milk on cooked cereal)120561Cereals and Breads104.02-6 per week1/2 cup (4 oz)d95b890fe63c21d0
4594890.6661.02017-07-29T02:28:12.49[{'code': 'acesupot', 'description': 'Acesulfa...Soy milk, not fortified (Milk on cooked cereal)120561Cereals and Breads104.02-6 per week2/3 cupd95b890fe63c21d0
\n", "
" ], "text/plain": [ " amount consumptionAdjustment created \\\n", "459488 0.5 1.0 2017-07-29T02:28:12.49 \n", "459489 0.666 1.0 2017-07-29T02:28:12.49 \n", "\n", " data \\\n", "459488 [{'code': 'acesupot', 'description': 'Acesulfa... \n", "459489 [{'code': 'acesupot', 'description': 'Acesulfa... \n", "\n", " description foodCode \\\n", "459488 Soy milk, not fortified (Milk on cooked cereal) 120561 \n", "459489 Soy milk, not fortified (Milk on cooked cereal) 120561 \n", "\n", " foodGroup frequency servingFrequencyText servingSizeText \\\n", "459488 Cereals and Breads 104.0 2-6 per week 1/2 cup (4 oz) \n", "459489 Cereals and Breads 104.0 2-6 per week 2/3 cup \n", "\n", " survey_id \n", "459488 d95b890fe63c21d0 \n", "459489 d95b890fe63c21d0 " ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "duplicated[1]" ] }, { "cell_type": "code", "execution_count": 369, "metadata": {}, "outputs": [], "source": [ "# manually ignoring the duplicated index values above,\n", "# otherwise would need to filter by grouped survey ids and that seems\n", "# excessive for such a small impact (and likely bug on vioscreens side)\n", "vios_with_data_nodup = vios_with_data.loc[[i for i in vios_with_data.index if i not in [1016259, 459489]]]\n", "\n", "# \"frequency\" is almost certainly the number of instances estimated per year.\n", "# For example, when someone consumes tea once per day, the frequency is 365 whereas\n", "# once per week it is 52. \n", "vios_with_data_nodup_pivot = vios_with_data_nodup.pivot(index='survey_id', \n", " columns='description', \n", " values='frequency').fillna(0.0)\n", "\n", "vios_with_data_nodup_pivot = remap_to_sample_id(sanitize_columns(vios_with_data_nodup_pivot))\n", "vios_with_data_nodup_pivot.to_csv('vioscreen_foods_consumed_frequency_units_per_year.tsv', sep='\\t',\n", " index=True, header=True)" ] }, { "cell_type": "code", "execution_count": 370, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
survey_idAll_other_beans_such_as_baked_beans__lima_beans_and_chili_without_meatAll_other_cheese__such_as_American__cheddar_or_cream_cheese__including_cheese_used_in_cookingAll_other_fruitsAll_other_lunch_meat_such_as_bologna__salami_and_SpamAll_teaApples__applesauce_and_pearsApricots___driedApricots___fresh_or_cannedAsian_style__stir_fried__noodles_and_rice__such_as_chow_mein__fried_rice_and_pad_Thai...White_RiceWhite_breads__including_bagels__rolls_and_English_muffinsWhite_fish__broiled_or_baked__such_as_sole__halibut__snapper_and_codWhite_or_rosé_wineWhole_grain_breads__including_bagels_and_rollsWhole_grain_breads__including_bagels_and_rolls__100__Whole_Grains_Whole_kernel_grains_such_as_brown_riceWinter_squash_such_as_acorn__butternut_and_pumpkinYams_and_sweet_potatoesYogurt__all_types_except_frozen
#SampleID
10317.0000722910000904d9779a86c0182.052.00052.0182.000...0052.052.0028.028.052.0052.0
10317.000107687000fc7ac577765790104.0001642.052.0000...182.00104.000104.0000182.0
10317.0000876630012cf765bb386ea52.0000052.0000...0000000000
10317.000030578001b4c3e1c848f58182.0104.000912.0182.0000...182.0012.024.0182.0182.0028.0052.0
10317.000030579001b4c3e1c848f58182.0104.000912.0182.0000...182.0012.024.0182.0182.0028.0052.0
\n", "

5 rows × 260 columns

\n", "
" ], "text/plain": [ " survey_id \\\n", "#SampleID \n", "10317.000072291 0000904d9779a86c \n", "10317.000107687 000fc7ac57776579 \n", "10317.000087663 0012cf765bb386ea \n", "10317.000030578 001b4c3e1c848f58 \n", "10317.000030579 001b4c3e1c848f58 \n", "\n", " All_other_beans_such_as_baked_beans__lima_beans_and_chili_without_meat \\\n", "#SampleID \n", "10317.000072291 0 \n", "10317.000107687 0 \n", "10317.000087663 52.0 \n", "10317.000030578 182.0 \n", "10317.000030579 182.0 \n", "\n", " All_other_cheese__such_as_American__cheddar_or_cream_cheese__including_cheese_used_in_cooking \\\n", "#SampleID \n", "10317.000072291 182.0 \n", "10317.000107687 104.0 \n", "10317.000087663 0 \n", "10317.000030578 104.0 \n", "10317.000030579 104.0 \n", "\n", " All_other_fruits \\\n", "#SampleID \n", "10317.000072291 52.0 \n", "10317.000107687 0 \n", "10317.000087663 0 \n", "10317.000030578 0 \n", "10317.000030579 0 \n", "\n", " All_other_lunch_meat_such_as_bologna__salami_and_Spam All_tea \\\n", "#SampleID \n", "10317.000072291 0 0 \n", "10317.000107687 0 1642.0 \n", "10317.000087663 0 0 \n", "10317.000030578 0 912.0 \n", "10317.000030579 0 912.0 \n", "\n", " Apples__applesauce_and_pears Apricots___dried \\\n", "#SampleID \n", "10317.000072291 52.0 182.0 \n", "10317.000107687 52.0 0 \n", "10317.000087663 52.0 0 \n", "10317.000030578 182.0 0 \n", "10317.000030579 182.0 0 \n", "\n", " Apricots___fresh_or_canned \\\n", "#SampleID \n", "10317.000072291 0 \n", "10317.000107687 0 \n", "10317.000087663 0 \n", "10317.000030578 0 \n", "10317.000030579 0 \n", "\n", " Asian_style__stir_fried__noodles_and_rice__such_as_chow_mein__fried_rice_and_pad_Thai \\\n", "#SampleID \n", "10317.000072291 0 \n", "10317.000107687 0 \n", "10317.000087663 0 \n", "10317.000030578 0 \n", "10317.000030579 0 \n", "\n", " ... White_Rice \\\n", "#SampleID ... \n", "10317.000072291 ... 0 \n", "10317.000107687 ... 182.0 \n", "10317.000087663 ... 0 \n", "10317.000030578 ... 182.0 \n", "10317.000030579 ... 182.0 \n", "\n", " White_breads__including_bagels__rolls_and_English_muffins \\\n", "#SampleID \n", "10317.000072291 0 \n", "10317.000107687 0 \n", "10317.000087663 0 \n", "10317.000030578 0 \n", "10317.000030579 0 \n", "\n", " White_fish__broiled_or_baked__such_as_sole__halibut__snapper_and_cod \\\n", "#SampleID \n", "10317.000072291 52.0 \n", "10317.000107687 104.0 \n", "10317.000087663 0 \n", "10317.000030578 12.0 \n", "10317.000030579 12.0 \n", "\n", " White_or_rosé_wine \\\n", "#SampleID \n", "10317.000072291 52.0 \n", "10317.000107687 0 \n", "10317.000087663 0 \n", "10317.000030578 24.0 \n", "10317.000030579 24.0 \n", "\n", " Whole_grain_breads__including_bagels_and_rolls \\\n", "#SampleID \n", "10317.000072291 0 \n", "10317.000107687 0 \n", "10317.000087663 0 \n", "10317.000030578 182.0 \n", "10317.000030579 182.0 \n", "\n", " Whole_grain_breads__including_bagels_and_rolls__100__Whole_Grains_ \\\n", "#SampleID \n", "10317.000072291 28.0 \n", "10317.000107687 104.0 \n", "10317.000087663 0 \n", "10317.000030578 182.0 \n", "10317.000030579 182.0 \n", "\n", " Whole_kernel_grains_such_as_brown_rice \\\n", "#SampleID \n", "10317.000072291 28.0 \n", "10317.000107687 0 \n", "10317.000087663 0 \n", "10317.000030578 0 \n", "10317.000030579 0 \n", "\n", " Winter_squash_such_as_acorn__butternut_and_pumpkin \\\n", "#SampleID \n", "10317.000072291 52.0 \n", "10317.000107687 0 \n", "10317.000087663 0 \n", "10317.000030578 28.0 \n", "10317.000030579 28.0 \n", "\n", " Yams_and_sweet_potatoes Yogurt__all_types_except_frozen \n", "#SampleID \n", "10317.000072291 0 52.0 \n", "10317.000107687 0 182.0 \n", "10317.000087663 0 0 \n", "10317.000030578 0 52.0 \n", "10317.000030579 0 52.0 \n", "\n", "[5 rows x 260 columns]" ] }, "execution_count": 370, "metadata": {}, "output_type": "execute_result" } ], "source": [ "vios_with_data_nodup_pivot.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Formatting raw food information\n", "\n", "This resulting DataFrame is massive at 39M rows at the time of creating this notebook. Each row represents a micro or macronutrients of each food in its corresponding serving size that the person consumed.\n", "\n", "Due to the size, this frame is written out compressed (gzip). We are also not remapping to sample ID here as it would replicate a lot of data in what is already a pretty verbose structure.\n", "\n", "**WARNING: the first cell will run for 5-10 minutes**" ] }, { "cell_type": "code", "execution_count": 239, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "434.7384750843048\n" ] } ], "source": [ "# an example of loading the data and what the contents look like.\n", "# The \"code\" values *should* correspond to the non-null code values\n", "# in the full raw file\n", "\n", "# WARNING: this will run for 5-10 minutes\n", "items = []\n", "parsed = {}\n", "start = time.time()\n", "for _, row in vios_with_data.iterrows(): \n", " # we're using a memoization pattern to minimize parsing JSON. \n", " # Of the ~250,000 rows in vios_with_data, only about 50,000 have a unique \n", " # \"data\" JSON object. Parsing JSON is relatively expensive and this memoization\n", " # shaves a pretty large amount of time off.\n", " datablock = row['data']\n", " if datablock in parsed:\n", " data_parsed = parsed[datablock].copy()\n", " else:\n", " data_parsed = pd.read_json(datablock.replace(\"'\", '\"'), dtype=False, \n", " convert_axes=False, convert_dates=False)\n", " \n", " parsed[datablock] = data_parsed\n", " \n", " # make sure relevant information about the survey etc are retained\n", " # some is probably extraneous\n", " for c in ['survey_id', \n", " 'description', \n", " 'frequency', \n", " 'servingFrequencyText', \n", " 'servingSizeText', \n", " 'foodCode']:\n", " data_parsed[c] = row[c]\n", " items.append(data_parsed)\n", "meal_data = pd.concat(items, ignore_index=True)\n", "meal_data.set_index('survey_id', inplace=True)\n", "print(time.time() - start) " ] }, { "cell_type": "code", "execution_count": 240, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
amountcodedescriptionunitsvalueTypefrequencyservingFrequencyTextservingSizeTextfoodCode
survey_id
80043f52095064970.000000acesupotEnchiladas and tamalesmgAmount286.05-6 per week3 enchiladas or tamales40076
80043f52095064970.000000addsugarEnchiladas and tamalesgAmount286.05-6 per week3 enchiladas or tamales40076
80043f52095064970.000000adsugtotEnchiladas and tamalesgAmount286.05-6 per week3 enchiladas or tamales40076
80043f52095064971.905583alanineEnchiladas and tamalesgAmount286.05-6 per week3 enchiladas or tamales40076
80043f52095064970.000000alcoholEnchiladas and tamalesgAmount286.05-6 per week3 enchiladas or tamales40076
\n", "
" ], "text/plain": [ " amount code description units valueType \\\n", "survey_id \n", "80043f5209506497 0.000000 acesupot Enchiladas and tamales mg Amount \n", "80043f5209506497 0.000000 addsugar Enchiladas and tamales g Amount \n", "80043f5209506497 0.000000 adsugtot Enchiladas and tamales g Amount \n", "80043f5209506497 1.905583 alanine Enchiladas and tamales g Amount \n", "80043f5209506497 0.000000 alcohol Enchiladas and tamales g Amount \n", "\n", " frequency servingFrequencyText servingSizeText \\\n", "survey_id \n", "80043f5209506497 286.0 5-6 per week 3 enchiladas or tamales \n", "80043f5209506497 286.0 5-6 per week 3 enchiladas or tamales \n", "80043f5209506497 286.0 5-6 per week 3 enchiladas or tamales \n", "80043f5209506497 286.0 5-6 per week 3 enchiladas or tamales \n", "80043f5209506497 286.0 5-6 per week 3 enchiladas or tamales \n", "\n", " foodCode \n", "survey_id \n", "80043f5209506497 40076 \n", "80043f5209506497 40076 \n", "80043f5209506497 40076 \n", "80043f5209506497 40076 \n", "80043f5209506497 40076 " ] }, "execution_count": 240, "metadata": {}, "output_type": "execute_result" } ], "source": [ "meal_data.head()" ] }, { "cell_type": "code", "execution_count": 245, "metadata": {}, "outputs": [], "source": [ "meal_data.to_csv('vioscreen_expanded_meal_data.tsv.gz', sep='\\t', index=True, header=True,\n", " compression='gzip')" ] }, { "cell_type": "code", "execution_count": 246, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(39486728, 9)" ] }, "execution_count": 246, "metadata": {}, "output_type": "execute_result" } ], "source": [ "meal_data.shape" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Round trip test\n", "\n", "Note the codes in the meal data do not have a 1-1 correspondence with the micromacro frame. In that frame, we used the description field which is more human readable than the code.\n", "\n", "It also appears that some codes like GLAC are not obviously represented in the micromacro frame. Similarly, there are some entities like animal protein that do not appear in the meal data frame so that may be inferred by vioscreen elsewhere. " ] }, { "cell_type": "code", "execution_count": 372, "metadata": {}, "outputs": [], "source": [ "test_survey = '001b4c3e1c848f58'\n", "test = meal_data.loc[test_survey]" ] }, { "cell_type": "code", "execution_count": 378, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
amountcodedescriptionunitsvalueTypefrequencyservingFrequencyTextservingSizeTextfoodCode
survey_id
001b4c3e1c848f580.000000GLACAll other cheese, such as American, cheddar or...unitIndex104.02 per week1 slice (1 oz), 1/4 cup shredded, 2 tablespoon...70005
001b4c3e1c848f581.910150GLACApples, applesauce and pearsunitIndex182.03-4 per week1/2 apple or pear, 1/4 cup20001
001b4c3e1c848f580.000000GLACAvocado and guacamoleunitIndex52.01 per week1/2 avocado, 1/2 cup30006
001b4c3e1c848f583.096284GLACBananasunitIndex182.03-4 per week1/2 banana20002
001b4c3e1c848f580.000000GLACBeef, pork, ham and lamb - with fatunitIndex28.02-3 per month3 slices, 2 chops, small steak (4-5 oz)40002
001b4c3e1c848f583.568904GLACBeer (all types)unitIndex52.01 day per week16 oz (1 pint)90002
001b4c3e1c848f580.000000GLACBroccoliunitIndex52.01 per week1/4 cup30012
001b4c3e1c848f581.437427GLACBurritos, tacos, tostadas and quesadillasunitIndex104.02 per week1 taco or tostada, 1 small burrito, 1 quesadilla40202
001b4c3e1c848f580.000000GLACButter (Cereals and Breads)unitIndex182.03-4 per week4 teaspoons (4 pats)10004
001b4c3e1c848f580.930636GLACCarrots - cookedunitIndex182.03-4 per week1/2 cup30041
001b4c3e1c848f580.164987GLACCarrots - rawunitIndex182.03-4 per week1/2 carrot, 4 baby carrots, 1/4 cup30040
001b4c3e1c848f580.000000GLACCauliflower, cabbage and Brussels sproutsunitIndex52.01 per week1/4 cup30018
001b4c3e1c848f581.960348GLACChocolate, candy bars, and toffeeunitIndex52.01 per week1 regular bar, 8-10 bite sized-pieces (1.5 oz)80008
001b4c3e1c848f580.000000GLACCoffee (not lattes or mochas)unitIndex912.02-3 per day1 small cup (6 oz)90008
001b4c3e1c848f580.000000GLACColeslawunitIndex52.01 per week3/4 cup30017
001b4c3e1c848f582.233787GLACCookies and cakes - regularunitIndex52.01 per week2 medium cookies, 1 large cookie, 1 small piec...80005
001b4c3e1c848f584.057757GLACWhole grain breads, including bagels and rollsunitIndex182.03-4 per week2 slices bread, 1 medium roll, 1 English muffi...120413
001b4c3e1c848f580.000000GLACEggsunitIndex182.03-4 per week2 eggs50004
001b4c3e1c848f580.000000GLACFresh garlic, including in cookingunitIndex104.02 per week1 clove30043
001b4c3e1c848f580.544489GLACFresh tomatoesunitIndex182.03-4 per week4 slices, 1 medium tomato30008
001b4c3e1c848f580.851028GLACGrapes, freshunitIndex52.01 per week8-10 grapes, 1/2 cup20024
001b4c3e1c848f580.000000GLACGreen or string beansunitIndex182.03-4 per week1/4 cup30001
001b4c3e1c848f580.000000GLACGreen peppers and green chilies, cookedunitIndex52.01 per week1/8 cup30054
001b4c3e1c848f580.000000GLACGreen salad (Lettuce or spinach)unitIndex286.05-6 per week1 cup (medium bowl)30021
001b4c3e1c848f580.711497GLACLatte (whole milk), mocha or hot chocolateunitIndex52.01 per week1 regular cup (8-10 oz)90007
001b4c3e1c848f580.000000GLACLunch meats such as ham, turkey and lowfat bol...unitIndex28.02-3 per month2 slices (2 oz)40028
001b4c3e1c848f581.636527GLACMuffins, scones, croissants and biscuitsunitIndex52.01 per weekSmall muffin, small croissant, small biscuit60028
001b4c3e1c848f580.000000GLACOil, canola (Fat used in cooking)unitIndex365.01 per day2 teaspoons10011
001b4c3e1c848f580.000000GLACOil, olive (Cereals and Breads)unitIndex182.03-4 per week4 teaspoons (4 pats)10010
001b4c3e1c848f580.000000GLACOil, other such as corn, soybean, safflower or...unitIndex365.01 per day2 teaspoons10007
..............................
001b4c3e1c848f580.000000GLACRed peppers and red chilies, cookedunitIndex182.03-4 per week1/8 cup30057
001b4c3e1c848f580.000000GLACRed peppers and red chilies, rawunitIndex52.01 per week1/4 pepper, 4-6 strips, 1/4 cup30056
001b4c3e1c848f580.000000GLACRed WineunitIndex104.02 days per week4 oz (about 1/8th bottle)90018
001b4c3e1c848f580.702932GLACRegular potato, tortilla chips, corn chips and...unitIndex28.02-3 per month2 handfuls, 1 small bag (1 oz)60031
001b4c3e1c848f580.801028GLACSpaghetti, lasagna and other pasta with tomato...unitIndex12.01 per month1 cup (medium bowl)40017
001b4c3e1c848f580.252476GLACStew, pot pie, curries and casseroles with mea...unitIndex12.01 per month1 cup (medium bowl)40004
001b4c3e1c848f580.000000GLACSummer squash and zucchiniunitIndex182.03-4 per week1/4 cup30046
001b4c3e1c848f580.000000GLACAll teaunitIndex912.02-3 per day1 large glass (12 oz)90009
001b4c3e1c848f581.538358GLACVegetable, minestrone and tomato soupunitIndex52.01 per week1 1/2 cups (large bowl)40033
001b4c3e1c848f580.000000GLACWater (tap or bottled)unitIndex1642.04-5 per day1 regular glass (8 oz)90017
001b4c3e1c848f580.000000GLACWhite fish (broiled or baked) such as sole, ha...unitIndex12.01 per month1 medium piece (4-5 oz)40073
001b4c3e1c848f580.000000GLACWhite or rosé wineunitIndex24.02-3 days per month6 oz (about 1/6th bottle)90019
001b4c3e1c848f580.665620GLACWinter squash such as acorn, butternut and pum...unitIndex28.02-3 per month3/4 cup30047
001b4c3e1c848f581.336993GLACYogurt, all types except frozenunitIndex52.01 per week3/4 (6 oz) (regular container)70017
001b4c3e1c848f580.000000GLACButter (Fat used in cooking)unitIndex365.01 per day2 teaspoons10004
001b4c3e1c848f580.000000GLACOil, olive (Fat used in cooking)unitIndex365.01 per day2 teaspoons10010
001b4c3e1c848f580.000000GLACOil, olive (Fats used on vegetables)unitIndex730.02 per day2 teaspoons10010
001b4c3e1c848f580.000000GLACSaltunitIndex1095.03 per day21-40 shakes (1/4 teaspoon)10100
001b4c3e1c848f580.000000GLACVeggie soy or tofu burgers or ground meat subs...unitIndex52.01 per week1 burger or 3 oz120191
001b4c3e1c848f580.000000GLACTofuunitIndex104.02 per week4-5 pieces (4 oz)120206
001b4c3e1c848f580.000000GLACTempehunitIndex24.01-3 per month3 pieces (3 oz)120207
001b4c3e1c848f580.000000GLACSoy sauce, tamari, teriyaki sauce, Szechwan sa...unitIndex52.01 per week2 teaspoons120198
001b4c3e1c848f581.391782GLACCorn tortillasunitIndex52.01 per week2 (6\" diameter)120123
001b4c3e1c848f587.016067GLACWhite RiceunitIndex182.03-4 per week1/2 cup120414
001b4c3e1c848f580.000000GLACCooked greens, such as spinach, swiss chard an...unitIndex52.01 per week3/4 cup120438
001b4c3e1c848f583.030604GLACWhole grain breads, including bagels and rolls...unitIndex182.03-4 per week2 slices bread, 1 medium roll, 1 English muffi...120412
001b4c3e1c848f585.485402GLACCooked whole grain cerealsunitIndex286.05-6 per week1/2 cup (small bowl)120410
001b4c3e1c848f583.923203GLACAll other beans such as baked beans, lima bean...unitIndex182.03-4 per week3/4 cup30044
001b4c3e1c848f581.430008GLACSoy milk, fortified (Milk in cooked cereal)unitIndex286.05-6 per week2/3 cup120560
001b4c3e1c848f584.556126GLACRice milk (Milk in cooked cereal)unitIndex286.05-6 per week2/3 cup70018
\n", "

64 rows × 9 columns

\n", "
" ], "text/plain": [ " amount code \\\n", "survey_id \n", "001b4c3e1c848f58 0.000000 GLAC \n", "001b4c3e1c848f58 1.910150 GLAC \n", "001b4c3e1c848f58 0.000000 GLAC \n", "001b4c3e1c848f58 3.096284 GLAC \n", "001b4c3e1c848f58 0.000000 GLAC \n", "001b4c3e1c848f58 3.568904 GLAC \n", "001b4c3e1c848f58 0.000000 GLAC \n", "001b4c3e1c848f58 1.437427 GLAC \n", "001b4c3e1c848f58 0.000000 GLAC \n", "001b4c3e1c848f58 0.930636 GLAC \n", "001b4c3e1c848f58 0.164987 GLAC \n", "001b4c3e1c848f58 0.000000 GLAC \n", "001b4c3e1c848f58 1.960348 GLAC \n", "001b4c3e1c848f58 0.000000 GLAC \n", "001b4c3e1c848f58 0.000000 GLAC \n", "001b4c3e1c848f58 2.233787 GLAC \n", "001b4c3e1c848f58 4.057757 GLAC \n", "001b4c3e1c848f58 0.000000 GLAC \n", "001b4c3e1c848f58 0.000000 GLAC \n", "001b4c3e1c848f58 0.544489 GLAC \n", "001b4c3e1c848f58 0.851028 GLAC \n", "001b4c3e1c848f58 0.000000 GLAC \n", "001b4c3e1c848f58 0.000000 GLAC \n", "001b4c3e1c848f58 0.000000 GLAC \n", "001b4c3e1c848f58 0.711497 GLAC \n", "001b4c3e1c848f58 0.000000 GLAC \n", "001b4c3e1c848f58 1.636527 GLAC \n", "001b4c3e1c848f58 0.000000 GLAC \n", "001b4c3e1c848f58 0.000000 GLAC \n", "001b4c3e1c848f58 0.000000 GLAC \n", "... ... ... \n", "001b4c3e1c848f58 0.000000 GLAC \n", "001b4c3e1c848f58 0.000000 GLAC \n", "001b4c3e1c848f58 0.000000 GLAC \n", "001b4c3e1c848f58 0.702932 GLAC \n", "001b4c3e1c848f58 0.801028 GLAC \n", "001b4c3e1c848f58 0.252476 GLAC \n", "001b4c3e1c848f58 0.000000 GLAC \n", "001b4c3e1c848f58 0.000000 GLAC \n", "001b4c3e1c848f58 1.538358 GLAC \n", "001b4c3e1c848f58 0.000000 GLAC \n", "001b4c3e1c848f58 0.000000 GLAC \n", "001b4c3e1c848f58 0.000000 GLAC \n", "001b4c3e1c848f58 0.665620 GLAC \n", "001b4c3e1c848f58 1.336993 GLAC \n", "001b4c3e1c848f58 0.000000 GLAC \n", "001b4c3e1c848f58 0.000000 GLAC \n", "001b4c3e1c848f58 0.000000 GLAC \n", "001b4c3e1c848f58 0.000000 GLAC \n", "001b4c3e1c848f58 0.000000 GLAC \n", "001b4c3e1c848f58 0.000000 GLAC \n", "001b4c3e1c848f58 0.000000 GLAC \n", "001b4c3e1c848f58 0.000000 GLAC \n", "001b4c3e1c848f58 1.391782 GLAC \n", "001b4c3e1c848f58 7.016067 GLAC \n", "001b4c3e1c848f58 0.000000 GLAC \n", "001b4c3e1c848f58 3.030604 GLAC \n", "001b4c3e1c848f58 5.485402 GLAC \n", "001b4c3e1c848f58 3.923203 GLAC \n", "001b4c3e1c848f58 1.430008 GLAC \n", "001b4c3e1c848f58 4.556126 GLAC \n", "\n", " description units \\\n", "survey_id \n", "001b4c3e1c848f58 All other cheese, such as American, cheddar or... unit \n", "001b4c3e1c848f58 Apples, applesauce and pears unit \n", "001b4c3e1c848f58 Avocado and guacamole unit \n", "001b4c3e1c848f58 Bananas unit \n", "001b4c3e1c848f58 Beef, pork, ham and lamb - with fat unit \n", "001b4c3e1c848f58 Beer (all types) unit \n", "001b4c3e1c848f58 Broccoli unit \n", "001b4c3e1c848f58 Burritos, tacos, tostadas and quesadillas unit \n", "001b4c3e1c848f58 Butter (Cereals and Breads) unit \n", "001b4c3e1c848f58 Carrots - cooked unit \n", "001b4c3e1c848f58 Carrots - raw unit \n", "001b4c3e1c848f58 Cauliflower, cabbage and Brussels sprouts unit \n", "001b4c3e1c848f58 Chocolate, candy bars, and toffee unit \n", "001b4c3e1c848f58 Coffee (not lattes or mochas) unit \n", "001b4c3e1c848f58 Coleslaw unit \n", "001b4c3e1c848f58 Cookies and cakes - regular unit \n", "001b4c3e1c848f58 Whole grain breads, including bagels and rolls unit \n", "001b4c3e1c848f58 Eggs unit \n", "001b4c3e1c848f58 Fresh garlic, including in cooking unit \n", "001b4c3e1c848f58 Fresh tomatoes unit \n", "001b4c3e1c848f58 Grapes, fresh unit \n", "001b4c3e1c848f58 Green or string beans unit \n", "001b4c3e1c848f58 Green peppers and green chilies, cooked unit \n", "001b4c3e1c848f58 Green salad (Lettuce or spinach) unit \n", "001b4c3e1c848f58 Latte (whole milk), mocha or hot chocolate unit \n", "001b4c3e1c848f58 Lunch meats such as ham, turkey and lowfat bol... unit \n", "001b4c3e1c848f58 Muffins, scones, croissants and biscuits unit \n", "001b4c3e1c848f58 Oil, canola (Fat used in cooking) unit \n", "001b4c3e1c848f58 Oil, olive (Cereals and Breads) unit \n", "001b4c3e1c848f58 Oil, other such as corn, soybean, safflower or... unit \n", "... ... ... \n", "001b4c3e1c848f58 Red peppers and red chilies, cooked unit \n", "001b4c3e1c848f58 Red peppers and red chilies, raw unit \n", "001b4c3e1c848f58 Red Wine unit \n", "001b4c3e1c848f58 Regular potato, tortilla chips, corn chips and... unit \n", "001b4c3e1c848f58 Spaghetti, lasagna and other pasta with tomato... unit \n", "001b4c3e1c848f58 Stew, pot pie, curries and casseroles with mea... unit \n", "001b4c3e1c848f58 Summer squash and zucchini unit \n", "001b4c3e1c848f58 All tea unit \n", "001b4c3e1c848f58 Vegetable, minestrone and tomato soup unit \n", "001b4c3e1c848f58 Water (tap or bottled) unit \n", "001b4c3e1c848f58 White fish (broiled or baked) such as sole, ha... unit \n", "001b4c3e1c848f58 White or rosé wine unit \n", "001b4c3e1c848f58 Winter squash such as acorn, butternut and pum... unit \n", "001b4c3e1c848f58 Yogurt, all types except frozen unit \n", "001b4c3e1c848f58 Butter (Fat used in cooking) unit \n", "001b4c3e1c848f58 Oil, olive (Fat used in cooking) unit \n", "001b4c3e1c848f58 Oil, olive (Fats used on vegetables) unit \n", "001b4c3e1c848f58 Salt unit \n", "001b4c3e1c848f58 Veggie soy or tofu burgers or ground meat subs... unit \n", "001b4c3e1c848f58 Tofu unit \n", "001b4c3e1c848f58 Tempeh unit \n", "001b4c3e1c848f58 Soy sauce, tamari, teriyaki sauce, Szechwan sa... unit \n", "001b4c3e1c848f58 Corn tortillas unit \n", "001b4c3e1c848f58 White Rice unit \n", "001b4c3e1c848f58 Cooked greens, such as spinach, swiss chard an... unit \n", "001b4c3e1c848f58 Whole grain breads, including bagels and rolls... unit \n", "001b4c3e1c848f58 Cooked whole grain cereals unit \n", "001b4c3e1c848f58 All other beans such as baked beans, lima bean... unit \n", "001b4c3e1c848f58 Soy milk, fortified (Milk in cooked cereal) unit \n", "001b4c3e1c848f58 Rice milk (Milk in cooked cereal) unit \n", "\n", " valueType frequency servingFrequencyText \\\n", "survey_id \n", "001b4c3e1c848f58 Index 104.0 2 per week \n", "001b4c3e1c848f58 Index 182.0 3-4 per week \n", "001b4c3e1c848f58 Index 52.0 1 per week \n", "001b4c3e1c848f58 Index 182.0 3-4 per week \n", "001b4c3e1c848f58 Index 28.0 2-3 per month \n", "001b4c3e1c848f58 Index 52.0 1 day per week \n", "001b4c3e1c848f58 Index 52.0 1 per week \n", "001b4c3e1c848f58 Index 104.0 2 per week \n", "001b4c3e1c848f58 Index 182.0 3-4 per week \n", "001b4c3e1c848f58 Index 182.0 3-4 per week \n", "001b4c3e1c848f58 Index 182.0 3-4 per week \n", "001b4c3e1c848f58 Index 52.0 1 per week \n", "001b4c3e1c848f58 Index 52.0 1 per week \n", "001b4c3e1c848f58 Index 912.0 2-3 per day \n", "001b4c3e1c848f58 Index 52.0 1 per week \n", "001b4c3e1c848f58 Index 52.0 1 per week \n", "001b4c3e1c848f58 Index 182.0 3-4 per week \n", "001b4c3e1c848f58 Index 182.0 3-4 per week \n", "001b4c3e1c848f58 Index 104.0 2 per week \n", "001b4c3e1c848f58 Index 182.0 3-4 per week \n", "001b4c3e1c848f58 Index 52.0 1 per week \n", "001b4c3e1c848f58 Index 182.0 3-4 per week \n", "001b4c3e1c848f58 Index 52.0 1 per week \n", "001b4c3e1c848f58 Index 286.0 5-6 per week \n", "001b4c3e1c848f58 Index 52.0 1 per week \n", "001b4c3e1c848f58 Index 28.0 2-3 per month \n", "001b4c3e1c848f58 Index 52.0 1 per week \n", "001b4c3e1c848f58 Index 365.0 1 per day \n", "001b4c3e1c848f58 Index 182.0 3-4 per week \n", "001b4c3e1c848f58 Index 365.0 1 per day \n", "... ... ... ... \n", "001b4c3e1c848f58 Index 182.0 3-4 per week \n", "001b4c3e1c848f58 Index 52.0 1 per week \n", "001b4c3e1c848f58 Index 104.0 2 days per week \n", "001b4c3e1c848f58 Index 28.0 2-3 per month \n", "001b4c3e1c848f58 Index 12.0 1 per month \n", "001b4c3e1c848f58 Index 12.0 1 per month \n", "001b4c3e1c848f58 Index 182.0 3-4 per week \n", "001b4c3e1c848f58 Index 912.0 2-3 per day \n", "001b4c3e1c848f58 Index 52.0 1 per week \n", "001b4c3e1c848f58 Index 1642.0 4-5 per day \n", "001b4c3e1c848f58 Index 12.0 1 per month \n", "001b4c3e1c848f58 Index 24.0 2-3 days per month \n", "001b4c3e1c848f58 Index 28.0 2-3 per month \n", "001b4c3e1c848f58 Index 52.0 1 per week \n", "001b4c3e1c848f58 Index 365.0 1 per day \n", "001b4c3e1c848f58 Index 365.0 1 per day \n", "001b4c3e1c848f58 Index 730.0 2 per day \n", "001b4c3e1c848f58 Index 1095.0 3 per day \n", "001b4c3e1c848f58 Index 52.0 1 per week \n", "001b4c3e1c848f58 Index 104.0 2 per week \n", "001b4c3e1c848f58 Index 24.0 1-3 per month \n", "001b4c3e1c848f58 Index 52.0 1 per week \n", "001b4c3e1c848f58 Index 52.0 1 per week \n", "001b4c3e1c848f58 Index 182.0 3-4 per week \n", "001b4c3e1c848f58 Index 52.0 1 per week \n", "001b4c3e1c848f58 Index 182.0 3-4 per week \n", "001b4c3e1c848f58 Index 286.0 5-6 per week \n", "001b4c3e1c848f58 Index 182.0 3-4 per week \n", "001b4c3e1c848f58 Index 286.0 5-6 per week \n", "001b4c3e1c848f58 Index 286.0 5-6 per week \n", "\n", " servingSizeText foodCode \n", "survey_id \n", "001b4c3e1c848f58 1 slice (1 oz), 1/4 cup shredded, 2 tablespoon... 70005 \n", "001b4c3e1c848f58 1/2 apple or pear, 1/4 cup 20001 \n", "001b4c3e1c848f58 1/2 avocado, 1/2 cup 30006 \n", "001b4c3e1c848f58 1/2 banana 20002 \n", "001b4c3e1c848f58 3 slices, 2 chops, small steak (4-5 oz) 40002 \n", "001b4c3e1c848f58 16 oz (1 pint) 90002 \n", "001b4c3e1c848f58 1/4 cup 30012 \n", "001b4c3e1c848f58 1 taco or tostada, 1 small burrito, 1 quesadilla 40202 \n", "001b4c3e1c848f58 4 teaspoons (4 pats) 10004 \n", "001b4c3e1c848f58 1/2 cup 30041 \n", "001b4c3e1c848f58 1/2 carrot, 4 baby carrots, 1/4 cup 30040 \n", "001b4c3e1c848f58 1/4 cup 30018 \n", "001b4c3e1c848f58 1 regular bar, 8-10 bite sized-pieces (1.5 oz) 80008 \n", "001b4c3e1c848f58 1 small cup (6 oz) 90008 \n", "001b4c3e1c848f58 3/4 cup 30017 \n", "001b4c3e1c848f58 2 medium cookies, 1 large cookie, 1 small piec... 80005 \n", "001b4c3e1c848f58 2 slices bread, 1 medium roll, 1 English muffi... 120413 \n", "001b4c3e1c848f58 2 eggs 50004 \n", "001b4c3e1c848f58 1 clove 30043 \n", "001b4c3e1c848f58 4 slices, 1 medium tomato 30008 \n", "001b4c3e1c848f58 8-10 grapes, 1/2 cup 20024 \n", "001b4c3e1c848f58 1/4 cup 30001 \n", "001b4c3e1c848f58 1/8 cup 30054 \n", "001b4c3e1c848f58 1 cup (medium bowl) 30021 \n", "001b4c3e1c848f58 1 regular cup (8-10 oz) 90007 \n", "001b4c3e1c848f58 2 slices (2 oz) 40028 \n", "001b4c3e1c848f58 Small muffin, small croissant, small biscuit 60028 \n", "001b4c3e1c848f58 2 teaspoons 10011 \n", "001b4c3e1c848f58 4 teaspoons (4 pats) 10010 \n", "001b4c3e1c848f58 2 teaspoons 10007 \n", "... ... ... \n", "001b4c3e1c848f58 1/8 cup 30057 \n", "001b4c3e1c848f58 1/4 pepper, 4-6 strips, 1/4 cup 30056 \n", "001b4c3e1c848f58 4 oz (about 1/8th bottle) 90018 \n", "001b4c3e1c848f58 2 handfuls, 1 small bag (1 oz) 60031 \n", "001b4c3e1c848f58 1 cup (medium bowl) 40017 \n", "001b4c3e1c848f58 1 cup (medium bowl) 40004 \n", "001b4c3e1c848f58 1/4 cup 30046 \n", "001b4c3e1c848f58 1 large glass (12 oz) 90009 \n", "001b4c3e1c848f58 1 1/2 cups (large bowl) 40033 \n", "001b4c3e1c848f58 1 regular glass (8 oz) 90017 \n", "001b4c3e1c848f58 1 medium piece (4-5 oz) 40073 \n", "001b4c3e1c848f58 6 oz (about 1/6th bottle) 90019 \n", "001b4c3e1c848f58 3/4 cup 30047 \n", "001b4c3e1c848f58 3/4 (6 oz) (regular container) 70017 \n", "001b4c3e1c848f58 2 teaspoons 10004 \n", "001b4c3e1c848f58 2 teaspoons 10010 \n", "001b4c3e1c848f58 2 teaspoons 10010 \n", "001b4c3e1c848f58 21-40 shakes (1/4 teaspoon) 10100 \n", "001b4c3e1c848f58 1 burger or 3 oz 120191 \n", "001b4c3e1c848f58 4-5 pieces (4 oz) 120206 \n", "001b4c3e1c848f58 3 pieces (3 oz) 120207 \n", "001b4c3e1c848f58 2 teaspoons 120198 \n", "001b4c3e1c848f58 2 (6\" diameter) 120123 \n", "001b4c3e1c848f58 1/2 cup 120414 \n", "001b4c3e1c848f58 3/4 cup 120438 \n", "001b4c3e1c848f58 2 slices bread, 1 medium roll, 1 English muffi... 120412 \n", "001b4c3e1c848f58 1/2 cup (small bowl) 120410 \n", "001b4c3e1c848f58 3/4 cup 30044 \n", "001b4c3e1c848f58 2/3 cup 120560 \n", "001b4c3e1c848f58 2/3 cup 70018 \n", "\n", "[64 rows x 9 columns]" ] }, "execution_count": 378, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test[test.code == 'GLAC']" ] }, { "cell_type": "code", "execution_count": 373, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
amount
code
GLAC62.397260
GLTC69.876978
LineGi336.912520
acesupot0.000000
addsugar25.437240
adsugtot20.476220
alanine2.147886
alcohol17.794446
alphacar2840.690441
alphtoce12.646316
alphtoco11.345113
\n", "
" ], "text/plain": [ " amount\n", "code \n", "GLAC 62.397260\n", "GLTC 69.876978\n", "LineGi 336.912520\n", "acesupot 0.000000\n", "addsugar 25.437240\n", "adsugtot 20.476220\n", "alanine 2.147886\n", "alcohol 17.794446\n", "alphacar 2840.690441\n", "alphtoce 12.646316\n", "alphtoco 11.345113" ] }, "execution_count": 373, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test.groupby('code').agg('sum').head(11)" ] }, { "cell_type": "code", "execution_count": 375, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "survey_id 001b4c3e1c848f58\n", "3_Methylhistidine_in_mg 2.60684169804397\n", "Acesulfame_Potassium_in_mg 0.0\n", "Added_Sugars__by_Available_Carbohydrate__in_g 25.4372404710912\n", "Added_Sugars__by_Total_Sugars__in_g 20.4762200586827\n", "Alanine_in_g 2.14788575606249\n", "Alcohol_in_g 17.7944461190833\n", "Alpha_Carotene__provitamin_A_carotenoid__in_mcg 2840.69044111298\n", "Alpha_Tocopherol_in_mg 11.3451129336108\n", "Animal_Protein_in_g 16.6596165030072\n", "Arginine_in_g 2.87293720209445\n", "Name: 10317.000030578, dtype: object" ] }, "execution_count": 375, "metadata": {}, "output_type": "execute_result" } ], "source": [ "vios_coded_micromacro_pivot[vios_coded_micromacro_pivot.survey_id == test_survey].iloc[0].head(11)" ] }, { "cell_type": "code", "execution_count": 376, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "survey_id 001b4c3e1c848f58\n", "Percent_of_calories_from_Added_Sugar 0.864436614273603\n", "Percent_of_calories_from_Alcohol 7.643624875670007\n", "Percent_of_calories_from_Carbohydrate 38.41905488083203\n", "Percent_of_calories_from_Fat 41.82753132412285\n", "Percent_of_calories_from_Monounsaturated_Fat 21.020284676343078\n", "Percent_of_calories_from_Polyunsaturated_Fat 8.6645383097221\n", "Percent_of_calories_from_Protein 12.109788919375113\n", "Percent_of_calories_from_Saturated_Fat 11.298838478082244\n", "Name: 10317.000030578, dtype: object" ] }, "execution_count": 376, "metadata": {}, "output_type": "execute_result" } ], "source": [ "vios_coded_percents_pivot[vios_coded_percents_pivot.survey_id == test_survey].iloc[0]" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" } }, "nbformat": 4, "nbformat_minor": 2 }