pm21-dragon/exercises/release/exercise-07/2__pandas_intro.ipynb

978 lines
164 KiB
Plaintext
Raw Normal View History

2024-11-25 02:20:05 -05:00
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"deletable": false,
"editable": false,
"nbgrader": {
"cell_type": "code",
"checksum": "d69bc868f6d25bb4629e34ee0d04c82a",
"grade": false,
"grade_id": "cell-4591728b0e94385d",
"locked": true,
"schema_version": 3,
"solution": false,
"task": false
}
},
"outputs": [],
"source": [
"# You must run this cell, but you can ignore its contents.\n",
"\n",
"import hashlib\n",
"\n",
"def ads_hash(ty):\n",
" \"\"\"Return a unique string for input\"\"\"\n",
" ty_str = str(ty).encode()\n",
" m = hashlib.sha256()\n",
" m.update(ty_str)\n",
" return m.hexdigest()[:10]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"deletable": false,
"editable": false,
"nbgrader": {
"cell_type": "code",
"checksum": "44d7293dd0e7ee40d502467e2d5c86ee",
"grade": false,
"grade_id": "cell-b2a0e9ec110c03e1",
"locked": true,
"schema_version": 3,
"solution": false,
"task": false
}
},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": false,
"editable": false,
"nbgrader": {
"cell_type": "markdown",
"checksum": "34c6b25c92c765729b2b508ec57e5f11",
"grade": false,
"grade_id": "cell-bfeafe88c3a0be38",
"locked": true,
"schema_version": 3,
"solution": false,
"task": false
}
},
"source": [
"Let's get started by making a sample dataframe with fake data:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"deletable": false,
"editable": false,
"nbgrader": {
"cell_type": "code",
"checksum": "1b234584a4945e17b8ec688bee55a40d",
"grade": false,
"grade_id": "cell-710e679d69e83ae2",
"locked": true,
"schema_version": 3,
"solution": false,
"task": false
}
},
"outputs": [],
"source": [
"sample_df = pd.DataFrame({'number':[1,2,3,234,2,3,2,2,1,2], 'color':['blue','blue','red','red','red','blue','blue','red','green','yellow']})\n",
"display(sample_df)"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": false,
"editable": false,
"nbgrader": {
"cell_type": "markdown",
"checksum": "de6743867d921f2d09696cece3a92600",
"grade": false,
"grade_id": "cell-0320bf7579e7a5e0",
"locked": true,
"schema_version": 3,
"solution": false,
"task": false
}
},
"source": [
"## Q1 Create a Series named `condition` which is true for all rows where the color is red"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"deletable": false,
"nbgrader": {
"cell_type": "code",
"checksum": "4aaaebea3a8f93d9222530e203fb8e16",
"grade": false,
"grade_id": "cell-c57131439b7d2882",
"locked": false,
"schema_version": 3,
"solution": true,
"task": false
}
},
"outputs": [],
"source": [
"# YOUR CODE HERE\n",
"raise NotImplementedError()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"deletable": false,
"editable": false,
"nbgrader": {
"cell_type": "code",
"checksum": "56be9424b4c5783e4e9e0968289fa17c",
"grade": true,
"grade_id": "cell-c53eedea62c243e1",
"locked": true,
"points": 1,
"schema_version": 3,
"solution": false,
"task": false
}
},
"outputs": [],
"source": [
"# If this runs without error, it means the answer in your previous cell was correct.\n",
"assert ads_hash(condition)=='28612bf01a'"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": false,
"editable": false,
"nbgrader": {
"cell_type": "markdown",
"checksum": "283eb9c8950a55359805548926c43d23",
"grade": false,
"grade_id": "cell-ac32ae211af0e86d",
"locked": true,
"schema_version": 3,
"solution": false,
"task": false
}
},
"source": [
"## Q2 Create a new `DataFrame` named `red_sample_df` which contains only the rows with red color from `sample_df`.\n",
"\n",
"Hint: use your `condition` Series from above."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"deletable": false,
"nbgrader": {
"cell_type": "code",
"checksum": "cfb3e93d52c7ea11495e8777d124d709",
"grade": false,
"grade_id": "cell-c4be64a1d42e707d",
"locked": false,
"schema_version": 3,
"solution": true,
"task": false
}
},
"outputs": [],
"source": [
"# YOUR CODE HERE\n",
"raise NotImplementedError()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"deletable": false,
"editable": false,
"nbgrader": {
"cell_type": "code",
"checksum": "c2bbd23712904a512a4f3de6f4326d21",
"grade": true,
"grade_id": "cell-3c7eee81fa01c6e7",
"locked": true,
"points": 1,
"schema_version": 3,
"solution": false,
"task": false
}
},
"outputs": [],
"source": [
"# If this runs without error, it means the answer in your previous cell was correct.\n",
"assert ads_hash(red_sample_df)=='354bd7ec89'\n",
"assert ads_hash(sample_df)=='21692a4d62'"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": false,
"editable": false,
"nbgrader": {
"cell_type": "markdown",
"checksum": "92155c062254c66877cb4d54013734a6",
"grade": false,
"grade_id": "cell-98dc0525f2068315",
"locked": true,
"schema_version": 3,
"solution": false,
"task": false
}
},
"source": [
"Now let's use the DataFrame `.groupby()` method to find the mean value of `number` for each `color`. (Hint: this will be useful later in this exercise.)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"deletable": false,
"editable": false,
"nbgrader": {
"cell_type": "code",
"checksum": "1f65b8c98841d491f26644c207bf28a0",
"grade": false,
"grade_id": "cell-43c0e1a1816c47a2",
"locked": true,
"schema_version": 3,
"solution": false,
"task": false
}
},
"outputs": [],
"source": [
"for color, gdf in sample_df.groupby('color'):\n",
" mean_number = gdf[\"number\"].mean()\n",
" print(f\"{color} {mean_number}\")"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": false,
"editable": false,
"nbgrader": {
"cell_type": "markdown",
"checksum": "5ce137b45c6e116736efc4b5ae7436e0",
"grade": false,
"grade_id": "cell-9f757e6f5f690e81",
"locked": true,
"schema_version": 3,
"solution": false,
"task": false
}
},
"source": [
"# Using Pandas to work with real data\n",
"\n",
"<img src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAABH0AAAKYCAYAAADuT4S/AAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAAhdEVYdENyZWF0aW9uIFRpbWUAMjAyMDowNzoxNyAwNjoyNzo1MMKOQsIAAP94SURBVHhe7N0HYNRk/wfwb3vdLR2UtpRV9irDAaL8VWQ4UBDECSIucKHoq+IeKG7U1wE4UFEcqCDKEFGWOPBFUARk700p0EHpHv/8csld7rjZ3nUc3w/G5nJJLnny5Lk8v3vyJKhCASIiIiIiIiIiCijB2l8iIiIiIiIiIgogDPoQEREREREREQUgBn2IiIiIiIiIiAIQgz5ERERERERERAGIQR8iIiIiIiIiogDEoA8RERERERERUQBi0IeIiIiIiIiIKAAx6ENEREREREREFIAY9CEiIiIiIiIiCkBBFQptvNrJRwcFBaGk7Dh+3nsHMrN2IRihkA2KT4hBj+QJqB/VAeXlZQgONpkXIiIiIiIiIiIit6ol6FNUWoTy8nLl05QXyqcFBwfDFGSCKdikBn2KSrPw68E7cChzl/J+sDItGLHxEeiR/DqSY7oCZRX4ffvv+HXrLwhR/pWrYSEgOjoa15x5LZLqJVkCSEREREREREREVA1Bn8PHD+ORGQ8h63g2QoJNCA4KRmFZIS7p1B939r5LvcGsuDQHv6hBnx3K+yHKUkGIjY/EWUmvmoM+JRWY9PMkfL3uS4QhHBXKv9LSUtRPrI9nL34OnZp0QllFqbIq7W41LbgkwSMiIiIiIiIiolORT6MiFRXllqG8vFSdVlJagsy8TBzOzVADQBnK3wPZB3As7yhQrs6iBnFEkGxORZD6Vx03iAyLBIqVDS5Vpit/Q8pDUF5YjhCTBIkAkxosMrcS0lsLERERERERERGdqnwaGZFAS0WFrFKCLuZgjNxyZVL+hQaHIkSZpv4NVv6GhKrvC2lsFBpegTIUIDi0VP1rCitFeUWZNofc4VWmvC63GdS4jsSLlPe2Z8/Evxkf4N+DU7HhyBQcyPkfSovVRYmIiIiIiIiITjk+C/qUlwFbMr/F3/vfUIa3senIp0CFNOUJgslkQlBwkDrIrVfSlkf+yX8ixBSOiLJWSI4+EwlhXZAUfQbiTJ0RERJnnkECR0EmNTgkfQPpg6hQ+/EpRWbQbKzNnIhN2R9gU85k7M35GSUM+hARERERERHRKcp3QZ9S4FDhYmzNnYqtOZ9gb8F3QJDc6lWBClM5SiukBY/5X3BYMAqKC4AS87IhwVHonvw8Lmr9Ifq1fh8Xtn4PPZLeQFxEK7VFj7IIcotyER0VjejIaMTE1ENEeATq1YtVg0ESPSo6oexKSRTKisJRUhCG0OBoiRXVWpIuZWVKavhx0INj6jFQhtrA0Xb6c9D33x8cfZ4ngx6w9LWq5ClfppGj9Xs71BRH2+LLwXg++itfEhERERER6XzWkXNJEfDLnvtw8Pj/EBwUhrioxri03TRk5eXhlQUvIis3S31al3ycPL0rNDQMl3a9DBe1uwgIUSpbFWVawx89UmOuFJmCQ/DzxqXYcGgDLu86SO3/R+aQjQ4NDUGDqCQguBi/7r8dBw5vU+aPQIXpBNomDEd6whhERKsrI40ecJDb7k61p51JfpLhVNx3qr1O5XOSiIiIiIj8y6dBn+UZo7E3+2cEIQzx0c1wcYsvERIcKbVt86DWZ8yVmnJUYOrvH0GeuHXz/92iTz7J6t1/Y87qOXj0sscQFhqmTbVVXJ6HNfljsGvfeoSHRiMoNB/JuAJd6j+MyBhtplrm6NGj+N///oeSklIlaXzX8kOSOSwsDLGx9RATE4O4uHgkJyehXr162hxWcuilwim331WHkpISLFu2DLm5uUrlVu+Qyff0NIiLi0NCQjxSU1ORmJiovWulB4Aq48SJE1i6dKl6/Kxc7498Vnl5BZKSknDeeedW6fON5BhKIPXw4cNYseJP5bUnrYmC1M+XY19cXIxmzZqiR48e2nuVJ+tcsmQJsrOzlXWHKK89z9uyD8XFJUp+TUavXudrU/1PPw5Hjx5T0u9/6jb445yUczA+Pk49L2UfHZ2TQj+eREREREREVVWloI+0urEsXhGMnce+R27RXgQHmRARGoc2DYYo00NQViYVXvNs8lcWCZF+npXp89bPw+4jOzGgy+XqutQnfynrDQsJxdETR/HtylkY2/9hxLiI3pRXlGL/iUU4rsyvPvI9uARxYR3UPoKM8Qy9cleT5BYPqWhLwOfWW0eqARDfB12ClEqjuS+l8PBwtcJZv359dOnSRRk646yzzkL79u1tKpayXfLaH+mjp/vx48dx+eWDsHHjRkRGRqrT/Un2X4aIiAg10NK9e3dcdNGFOP/885X8Z+5ovPIV7AoMHz4CCxcuRHR0tJp+nqSdBL6kwv/ZZ58hPb2jTyr4+jpGjhyJH35YoB5vmebJ9pSWliI2NhaTJk1C794XVPoc0fP1unXrcP31w3Hs2DFLGnujqKgIrVq1wpw5s9U8Wx3nrPWcXIFRo0YhKyurUtvumvmclPXKZ0VGRiAxsYF6Tp5++mk444wz0KFDB2VfzfMKfbuIiIiIiIgqy2ctfYyUeptSWVMq3Upddmf2N8gsWonykhClmlyuvhEbF4e2cbcgKjRZne+Fuc9h1eE/EVQcjIogpZKn/CsuLUaTlKZ44bIXUD86UZ1PfnyXBiJ6HVCp16qc1pmLlYW0mWX5IMcNhaqVXpFbtWqVWjkuLCxUK+i+JodVH6Qlh/65MiQkJCAlJQX9+vXDkCFXID09XVvKPxVN2QY96HPJJf2xY8cOhIaGqtP9yX7/JTAiAZpmzZrh5ptvxtCh16nBJ31eT4ML+rzSMuSiiy7CgQMH1ECLfIardchyUunPyzuhpH1ffPnldEtwxtPPtqcHfKTV0U033WzZZ3f0bZH8d/vtt+GZZ8Yp0+R0qdx26MaNewaTJ09GVFSUGlDyZn36NpWWluHFF5/HDTfcYNk/fzKekzfcMEJtxeVJGnpLPzYySPBPBvlc2WcJvDVq1Aj9+/fHoEGD0KFDe22pqgQmiYiIiIjoVFepoI8sIpU5CcxsO7wVpUqlSZSXl6r97LROaoPw0EhIX83/O/AwMssWobQoVA3YlCgVwaSk+jgz7r9IjE5X1/X2T29hzvrZQDGUdZWqff+cKDqBHq174M2hb6HCBOQU7MDGvNdx9Ij8Cm9SA0Ah4eVIjbgQ7RNvgilU3walgqR80IrtK7BF2TaJMwkJJnVM7YjuLc5S9to8rSboFcyVK1di6NBhfgv6yPHRj5Ne8dYrjlIZLygoUF9LCxhp/TNy5Ci1FYxOX9YX9HVJ0Oeiiy7G9u3b/R70sd92eS37K2mdn5+vHgNp7fTEE0/g4osvUufxZp/NFfEgfPrp53jooYe0YEWp+hnO9kvWLcvJZ8vw1ltvqhV887q8r9Qbt/fiiy/BX3/9pQax3AVbZDn5PLmNqX37tliwYIEaDPNm/4305SS41rt3H2zZskUNgjlLB1ckXaT12wUXXIDZs7/TpvqXMegzbNj1fgv6SBrpaaWns55fZBv0c1JaOHXrdiZGjboNffr0VucT+rJERERERESeqtTPxxXKP3E07yheWfYyHpjxHzw2+xE88t0jeO7H53H0RJb6vgRXTMFKJbQ4FOWl4erfoHLltQSA1KduaRVhB/1nyPSSshJ1C6XlT2FpNg4fX4usgq04nLsBR/I2Y9/Rv5GVvx2lymxBMFkHZd2L1i/G+8vfw3vL3sWUX97Hu7+/g2Wblik1PO0DApxeaZWAglQoZZBggB4QkEq+DDk5OVi4cJF6q9mgQYPVW1yEelyUZesq2X8ZZB/0QfZdpkkrFLnla8OGDbj99tsxbtw4dRlvKtQyryTxDTcMVwMUUmGXQJarNJP39M+QwMLrr/9XDfq5ChS5oi8zZcoHWL16tSXg42x9+jTZBhnCwkLx6KOPqPnAuG3e0te7dOnP2LNnj5q2ldkfIflUtmfz5s3YtGmTOs1Vm
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": false,
"editable": false,
"nbgrader": {
"cell_type": "markdown",
"checksum": "24b73633a26b6493b3039e21b5b2ded6",
"grade": false,
"grade_id": "cell-927793ccea2a12d7",
"locked": true,
"schema_version": 3,
"solution": false,
"task": false
}
},
"source": [
"## Get the CSV file and upload it to Jupyter\n",
"\n",
"* Go to https://datadryad.org/stash/dataset/doi:10.5061/dryad.vb06d3k and download the data by clicking \"Download Dataset\".\n",
" * Check that now have a file called `doi_10.5061_dryad.vb06d3k__v1.zip` about 99KB in size in your Downloads.\n",
" * Unzip this file, which should create:\n",
" - `spectra.csv`\n",
" - `data_onevalueperbee.csv`\n",
" - `data_visitationsequence.csv`\n",
" * We are going focus on the file `data_onevalueperbee.csv`. Upload this file to your Jupyter server using the `Upload` button in the file view. Upload it into the same directory as this `.ipynb` file. Alternatively, if you are running this on your own computer, you can copy this `.csv` file into the directory with your `.ipynb` file.\n",
"\n",
"## Look at the file\n",
"\n",
"As we have covered, CSV stands for \"Comma separated values\" and is one of the most widely used file formats for scientific data. CSV files are typically like tables, potentially with column names in the first line. The lines of the file are then rows of the table. Spreadsheet programs can open most CSV files, sometimes with a few settings being required to deal with various CSV dialects.\n",
"\n",
"Let's take a look at this CSV file. This is mostly a repitition of what we recently looked at. We open the CSV file read the first few lines."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"deletable": false,
"editable": false,
"nbgrader": {
"cell_type": "code",
"checksum": "811086a0abe51449dbd8846574c1567e",
"grade": false,
"grade_id": "cell-0964768c19f2b15e",
"locked": true,
"schema_version": 3,
"solution": false,
"task": false
}
},
"outputs": [],
"source": [
"# Open the file in Python. The variable `fd` will be assigned the open file object.\n",
"fd = open('data_onevalueperbee.csv', mode=\"rb\")\n",
"\n",
"# Iterate over the lines in the file, also creating an integer called `line_number` via the\n",
"# use of the `enumerate()` function.\n",
"for (line_number, line) in enumerate(fd.readlines()):\n",
" # Print each line.\n",
" print(line)\n",
" if line_number >= 3:\n",
" # Stop after a few lines\n",
" break\n",
"# Close the open file object.\n",
"fd.close()"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": false,
"editable": false,
"nbgrader": {
"cell_type": "markdown",
"checksum": "4a69997e3bd85d495d63ad8c1c03209e",
"grade": false,
"grade_id": "cell-5dd558e28f33dab8",
"locked": true,
"schema_version": 3,
"solution": false,
"task": false
}
},
"source": [
"So, note that instead of a comma (`,`) separating the columns, we have a semicolon (`;`) instead. So we will need to use this when calling the Pandas `read_csv()` function. Thus, we must use the `sep=';'` keywoard argument. Also, this CSV file has some slightly unusual characters in it. From trial and error, I have learned that it must be opened with a `encoding='latin_1'` keyword argument to `read_csv()`.\n",
"\n",
"## Q3. Read the file `data_onevalueperbee.csv`? Into a dataframe named `df`.\n",
"\n",
"Hint: use the `read_csv` function not only with the filename as the required first positional argument, but also with the keyword arguments described above."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"deletable": false,
"nbgrader": {
"cell_type": "code",
"checksum": "cfa8fc26dccf202e0552fbe7f20d39a0",
"grade": false,
"grade_id": "cell-107a3973d184c577",
"locked": false,
"schema_version": 3,
"solution": true,
"task": false
}
},
"outputs": [],
"source": [
"# YOUR CODE HERE\n",
"raise NotImplementedError()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"deletable": false,
"editable": false,
"nbgrader": {
"cell_type": "code",
"checksum": "e29a8d768e35bab97a7e161ae92b4b10",
"grade": true,
"grade_id": "cell-66fcba98183cb481",
"locked": true,
"points": 1,
"schema_version": 3,
"solution": false,
"task": false
}
},
"outputs": [],
"source": [
"# If this runs without error, it means the answer in your previous cell was correct.\n",
"assert ads_hash(df)=='15994f5b0e'"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"deletable": false,
"editable": false,
"nbgrader": {
"cell_type": "code",
"checksum": "d0ba76462e8deb861510008fbba83e87",
"grade": false,
"grade_id": "cell-bdfe656f6d2cf933",
"locked": true,
"schema_version": 3,
"solution": false,
"task": false
}
},
"outputs": [],
"source": [
"# Let's look at the first few lines of the file with the pandas DataFrame `.head()` method:\n",
"\n",
"df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": false,
"editable": false,
"nbgrader": {
"cell_type": "markdown",
"checksum": "48a608e14353fc11ffde92b8aaafb774",
"grade": false,
"grade_id": "cell-85664fce05868fd1",
"locked": true,
"schema_version": 3,
"solution": false,
"task": false
}
},
"source": [
"## Q4. How many rows of data are in the dataframe you read from the file `data_onevalueperbee.csv`? Put the answer in a variable `num_rows`.\n",
"\n",
"Hint: you can use `len(df)` to calculate the number of rows in the DataFrame `df`."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"deletable": false,
"nbgrader": {
"cell_type": "code",
"checksum": "c5984a1e2e5c94c6a55898c84172585e",
"grade": false,
"grade_id": "cell-e10d9bb147d32ad5",
"locked": false,
"schema_version": 3,
"solution": true,
"task": false
}
},
"outputs": [],
"source": [
"# YOUR CODE HERE\n",
"raise NotImplementedError()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"deletable": false,
"editable": false,
"nbgrader": {
"cell_type": "code",
"checksum": "ef89bf89dc1360905dfc2ec1672e8675",
"grade": true,
"grade_id": "cell-227f7cff2325cb9a",
"locked": true,
"points": 1,
"schema_version": 3,
"solution": false,
"task": false
}
},
"outputs": [],
"source": [
"# If this runs without error, it means the answer in your previous cell was correct.\n",
"assert ads_hash(num_rows)=='ff2ccb6ba4'"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": false,
"editable": false,
"nbgrader": {
"cell_type": "markdown",
"checksum": "041b778a6f2e9774e89077f2e84e6c13",
"grade": false,
"grade_id": "cell-6b133b10967bef7a",
"locked": true,
"schema_version": 3,
"solution": false,
"task": false
}
},
"source": [
"## Q5. What are the unique values of the `nest` column?\n",
"\n",
"Put your answer in the variable `unique_nests`. Hint: use the `.unique()` method on the Series for the `nest` column."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"deletable": false,
"nbgrader": {
"cell_type": "code",
"checksum": "4351d536958f18d71dac51f8d4fcdb94",
"grade": false,
"grade_id": "cell-e0b36658308de988",
"locked": false,
"schema_version": 3,
"solution": true,
"task": false
}
},
"outputs": [],
"source": [
"# YOUR CODE HERE\n",
"raise NotImplementedError()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"deletable": false,
"editable": false,
"nbgrader": {
"cell_type": "code",
"checksum": "cb726f27d5302c71931b5c8846f5894d",
"grade": true,
"grade_id": "cell-208034c33360f85d",
"locked": true,
"points": 1,
"schema_version": 3,
"solution": false,
"task": false
}
},
"outputs": [],
"source": [
"# If this runs without error, it means the answer in your previous cell was correct.\n",
"assert ads_hash(unique_nests)=='59e69ce283'"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": false,
"editable": false,
"nbgrader": {
"cell_type": "markdown",
"checksum": "65c708010009ca83be24a16ebf3c9414",
"grade": false,
"grade_id": "cell-7802496c83a4d50a",
"locked": true,
"schema_version": 3,
"solution": false,
"task": false
}
},
"source": [
"## Q6. What are the unique values of the `treatment` column?\n",
"\n",
"Put your answer in the variable `unique_treatments`."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"deletable": false,
"nbgrader": {
"cell_type": "code",
"checksum": "915f9d18fbeff0d0950968a352969acc",
"grade": false,
"grade_id": "cell-7a258af7f499be42",
"locked": false,
"schema_version": 3,
"solution": true,
"task": false
}
},
"outputs": [],
"source": [
"# YOUR CODE HERE\n",
"raise NotImplementedError()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"deletable": false,
"editable": false,
"nbgrader": {
"cell_type": "code",
"checksum": "50cd6ceac936c5a36332c0c908630980",
"grade": true,
"grade_id": "cell-96c51bcd8e5a3ac3",
"locked": true,
"points": 1,
"schema_version": 3,
"solution": false,
"task": false
}
},
"outputs": [],
"source": [
"# If this runs without error, it means the answer in your previous cell was correct.\n",
"assert ads_hash(unique_treatments) == '670c49c25a'"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now let's check which nests are in each treatment:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"deletable": false,
"editable": false,
"nbgrader": {
"cell_type": "code",
"checksum": "db178e118177b7b60472616df9ce8662",
"grade": false,
"grade_id": "cell-2c145981d446e40d",
"locked": true,
"schema_version": 3,
"solution": false,
"task": false
}
},
"outputs": [],
"source": [
"for treatment, gdf in df.groupby('treatment'):\n",
" print(treatment)\n",
" print(gdf['nest'].unique())\n",
" print()"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": false,
"editable": false,
"nbgrader": {
"cell_type": "markdown",
"checksum": "16b626ddde51ff74529cc1f99b355951",
"grade": false,
"grade_id": "cell-e5dbccbf3a349a88",
"locked": true,
"schema_version": 3,
"solution": false,
"task": false
}
},
"source": [
"## Q7. Now make a `seaborn` `boxplot` of the time to start foraging (`tstartforaging` in the CSV file) on the `y` axis, `nest` number on the `x` axis, and with the color (`hue`) being the `treatment`. Save the plot to a file named `Figure2a.png`.\n",
"\n",
"Your plot should look like: <img src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAoAAAAHgCAYAAAA10dzkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAAPYQAAD2EBqD+naQAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nOzde1xVdb7/8fdyI2xQ2YoGSKKZ2sXULDUDZ9K0vGV2O+UMilamZqXiJfs1Zyor0zRDKU+OdSobk5N10qZmwrLGLIdMNDmp0W0ikSOIGoIXLgrr94fjPm5BFPZi7w3r9Xw8eARrfff6fvYS22+/a63v1zBN0xQAAABso4m/CwAAAIBvEQABAABshgAIAABgMwRAAAAAmyEAAgAA2AwBEAAAwGYIgAAAADZDAAQAALAZAiAAAIDNEAABAABshgAIAABgMwRAAAAAmyEAAgAA2AwBEAAAwGYIgAAAADZDAAQAALAZAiAAAIDNEAABAABshgAIAABgMwRAAAAAmyEAAgAA2AwBEAAAwGYIgAAAADZDAAQAALAZAiAAAIDNEAABAABshgAIAABgMwRAAAAAmyEAAgAA2AwBEAAAwGYIgAAAADZDAAQAALAZAiAAAIDNEAABAABshgAIAABgMwRAAAAAmyEAAgAA2AwBEAAAwGYIgAAAADZDAAQAALAZAiAAAIDNEAABAABshgAIAABgMwRAAAAAmyEAAgAA2AwBEAAAwGYIgAAAADZDAAQAALAZAiAAAIDNEAABAABshgAIAABgMwRAAAAAmyEAAgAA2AwBEAAAwGYIgAAAADZDAAQAALCZIH8X0JBVVlZq7969atGihQzD8Hc5AADgPJimqcOHDysmJkZNmthzLIwA6IW9e/cqNjbW32UAAIA62LNnj9q1a+fvMvyCAOiFFi1aSDr5CxQeHu7nagAAwPkoLi5WbGys+3PcjgiAXjh12Tc8PJwACABAA2Pn27fseeEbAADAxgiAAAAANkMABAAAsBnuAQQANGimaerEiROqqKjwdykIEA6HQ0FBQba+x+9cCIAAgAarvLxceXl5OnbsmL9LQYAJCwtT27ZtFRwc7O9SAhIBEADQIFVWVio7O1sOh0MxMTEKDg5mxAcyTVPl5eXav3+/srOz1aVLF9tO9lwTAiAAoEEqLy9XZWWlYmNjFRYW5u9yEEBCQ0PVtGlT7d69W+Xl5XI6nf4uKeAQiQEADRqjO6gOvxc14+wAaHDS09M1atQopaen+7sUAGiQCIAAGpTS0lIlJydr3759Sk5OVmlpqb9LAoAGhwAIoEFZtWqVDh48KEk6ePCgUlNT/VwRIA0YMEBJSUk+6WvFihVq2bKlT/o6H7/88osMw1BmZqa/S0EtEAABNBi5ublKTU2VaZqSTj7tl5qaqtzcXD9XBtTs1FyFQKAgAAJoEEzTVEpKylm3nwqFgK/dfffd2rhxo1JSUmQYhgzD0IoVK2QYhj766CP17t1bISEh+uKLLyRJH3zwgXr16iWn06mLL75YTz75pEc4TE5OVvfu3dWsWTPFxsbqgQce0JEjRyRJn332me655x4VFRW5+5ozZ44k6aKLLtLcuXM1duxYNW/eXB06dNBf/vIX7d+/X7fccouaN2+u7t27a+vWrR71p6en67rrrlNoaKhiY2M1depUHT161L3/oosu0rx583TvvfeqRYsWat++vV5++WX3/o4dO0qSrrrqKhmGoQEDBtTHaYbFCIAAGoScnBxlZGRUWe2hoqJCGRkZysnJ8VNlsLuUlBTFxcVpwoQJysvLU15enmJjYyVJs2fP1vz585WVlaUePXroo48+0pgxYzR16lR9++23Wr58uVasWKFnnnnGfbwmTZrohRde0M6dO/XGG2/o73//u2bPni1Jio+P15IlSxQeHu7ua9asWe7XLl68WP369dP27dt10003KTExUWPHjtWYMWP09ddfq3Pnzho7dqz7H0w7duzQkCFDdPvtt+ubb77R6tWrtWnTJj300EMe7/H5559X7969tX37dj3wwAOaPHmyvvvuO0nSli1bJEmffPKJ8vLytGbNmvo72bCOiTorKioyJZlFRUX+LgVo9CorK81Zs2aZAwcONPv37+/+GjhwoPnwww+blZWV/i4RPlZSUmJ+++23ZklJib9LMfv3729OmzbN/fOGDRtMSeZ7773n0e63v/2tOW/ePI9tK1euNNu2bXvWY7/99ttm69at3T+//vrrpsvlqtKuQ4cO5pgxY9w/5+XlmZLMxx57zL3tyy+/NCWZeXl5pmmaZmJiojlx4kSP43zxxRdmkyZN3Of1zONWVlaakZGR5rJly0zTNM3s7GxTkrl9+/azvgd/qOn3g89v02QiaAANgmEYmjZtmsaNG1ftdlaAQCDq3bu3x8/btm1TRkaGx4hfRUWFSktLdezYMYWFhWnDhg2aN2+evv32WxUXF+vEiRMqLS3V0aNH1axZsxr769Gjh/v7qKgoSVL37t2rbCsoKFB0dLS2bdumn376SatWrXK3MU3TvcrK5ZdfXuW4hmEoOjpaBQUFtT0dCCAEQAANRrt27ZSQkKA333xTpmnKMAwlJCTowgsv9HdpQLXODGyVlZV68skndfvtt1dp63Q6tXv3bg0fPlz333+/nn76aUVERGjTpk0aP368jh8/fs7+mjZt6v7+1D+KqttWWVnp/u+kSZM0derUKsdq3759tcc9dZxTx0DDRAAE0KCMHj1aaWlpOnDggNq0aaOEhAR/lwQoODi4yv2p1bn66qv1/fffq3PnztXu37p1q06cOKHnn3/evZLF22+/Xae+zsfVV1+tXbt2nbWe8xEcHCxJltUE3+AhEAANitPp1IwZMxQVFaXp06ezxicCwkUXXaSvvvpKv/zyiw4cOHDW0bHHH39cf/7znzVnzhzt2rVLWVlZWr16tf74xz9Kkjp16qQTJ07oxRdf1M8//6yVK1fqT3/6U5W+jhw5ok8//VQHDhzQsWPH6lz3I488oi+//FIPPvigMjMz9eOPP+r999/XlClTzvsYkZGRCg0N1bp167Rv3z4VFRXVuR74DgEQQIMTHx+v1atXKz4+3t+lAJKkWbNmyeFwqGvXrrrgggvO+lT6kCFD9Ne//lXr169Xnz59dO211yo5OVkdOnSQJPXs2VPJyclasGCBunXrplWrVmn+/Pkex4iPj9f999+vUaNG6YILLtDChQvrXHePHj20ceNG/fjjj/rtb3+rq666So899pjatm173scICgrSCy+8oOXLlysmJka33HJLneuB7ximyeRZdVVcXCyXy6WioiKFh4f7uxwAsJXS0lJlZ2erY8eOjASjipp+P/j8ZgQQAADAdgiAAAAANkMABAAAsBkCIAAAgM0QAAEAAGyGAAgAAGAzBEAAAACbIQACAADYDAEQAADAZoL8XQAAAFaqqKiQLxe5MgxDDofDZ/352i+//KKOHTtq+/bt6tmzp7/LgUUIgACARqOiokK3/9udKir81Wd9ulpFaM1/vxNQIfCiiy5SUlKSkpKS/F0KAhQBEADQaJimqaLCX3X46rGS4YO7nMxK6es/+3TE0SoVFRUyDENNmnA3mB3xpw4AaHyMJlITH3zVMWRWVlZqwYIF6ty5s0JCQtS+fXs988wzkqQdO3Zo4MCBCg0NVevWrTVx4kQdOXLE/dq7775bt956qxYtWqS2bduqdevWevDBB3X8+HFJ0oABA7R7925Nnz5dhmHIMAxJ0ooVK9SyZUv99a9/VdeuXRUSEqLdu3ersrJSTz31lNq1a6eQkBD17NlT69at8/IPAIGOAAgAgI89+uijWrBggR577DF9++23Sk1NVVRUlI4dO6ahQ4eqVatWysjI0DvvvKNPPvlEDz30kMfrN2zYoH/+85/asGGD3njjDa1YsUIrVqyQJK1Zs0bt2rXTU089pby8POXl5blfd+zYMc2fP1//+Z//qV27dikyMlIpKSl6/vnntWjRIn3zzTcaMmSIRo4cqR9//
"\n",
"Check the left panel of [Figure 2a](https://royalsocietypublishing.org/doi/10.1098/rspb.2018.0506#RSPB20180506F2) from the Lämsä et al. *Proc. Roy Soc B.* 2018 paper. Do you see any similarities or differences with your plot?\n",
"\n",
"Hint: import seaborn and call its `boxplot()` function with `data`, `x`, `y`, `hue` keyword arguments with values `df`, `nest`, `tstartforaging`, `treatment`."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"deletable": false,
"editable": false,
"nbgrader": {
"cell_type": "code",
"checksum": "4e0bfb3b3197f59b4b378cc42c9fb40e",
"grade": false,
"grade_id": "cell-df2e372b53d245fd",
"locked": true,
"schema_version": 3,
"solution": false,
"task": false
}
},
"outputs": [],
"source": [
"# Run this cell to import seaborn\n",
"import seaborn as sns"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"deletable": false,
"nbgrader": {
"cell_type": "code",
"checksum": "750a4c38a2b82e4ed8b56e0d18a09153",
"grade": true,
"grade_id": "cell-208bdcdc93f0ff65",
"locked": false,
"points": 1,
"schema_version": 3,
"solution": true,
"task": false
},
"tags": []
},
"outputs": [],
"source": [
"# YOUR CODE HERE\n",
"raise NotImplementedError()"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": false,
"editable": false,
"nbgrader": {
"cell_type": "markdown",
"checksum": "661c5a59e84cfbc7be88b5d72b6368fc",
"grade": false,
"grade_id": "cell-c9c67275053aadd0",
"locked": true,
"schema_version": 3,
"solution": false,
"task": false
}
},
"source": [
"## Q8. Now make a plot using a single line using seaborn's `boxplot` function which shows the duration of the foraging period on the y axis.\n",
"\n",
"This should be fairly similar to the second panel in [Figure 2a](https://royalsocietypublishing.org/doi/10.1098/rspb.2018.0506#RSPB20180506F2) from the Lämsä et al. Proc. Roy Soc B. 2018 paper.\n",
"\n",
"Hint: the duration of the foraging period is in the `'maxminusstarttime'` column."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"deletable": false,
"nbgrader": {
"cell_type": "code",
"checksum": "d324ea23b03ae829d5915f84777fd060",
"grade": true,
"grade_id": "cell-d6bb242d388543aa",
"locked": false,
"points": 1,
"schema_version": 3,
"solution": true,
"task": false
},
"tags": []
},
"outputs": [],
"source": [
"# YOUR CODE HERE\n",
"raise NotImplementedError()"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": false,
"editable": false,
"nbgrader": {
"cell_type": "markdown",
"checksum": "709c8344d5dc3d89e8538ddf1fe98d4d",
"grade": false,
"grade_id": "cell-b926439e7eb1c2b5",
"locked": true,
"schema_version": 3,
"solution": false,
"task": false
}
},
"source": [
"## Q9. Make a dictionary called `mean_duration_of_foraging_by_treatment`. Fill this with items whose keys are treatment strings (`'control'` and `'imidacloprid'`) and whose values are the mean of all `'maxminusstarttime'` values for rows in which the `treatment` variable is the same as the key. \n",
"\n",
"Hint: use `groupby` to group on the `treatment` column and use the `.mean()` method on the `maxminusstarttime` Series within each group data frame. Store each mean as the value in your dictionary for the key."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"deletable": false,
"nbgrader": {
"cell_type": "code",
"checksum": "ae28163eb76623d86f1a559b7034b23c",
"grade": false,
"grade_id": "cell-cdfad08c7777905d",
"locked": false,
"schema_version": 3,
"solution": true,
"task": false
}
},
"outputs": [],
"source": [
"# YOUR CODE HERE\n",
"raise NotImplementedError()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"deletable": false,
"editable": false,
"nbgrader": {
"cell_type": "code",
"checksum": "e9ed0cb64154eca5ef0047ce15d6e868",
"grade": true,
"grade_id": "cell-ad0c60eabc04c1b7",
"locked": true,
"points": 1,
"schema_version": 3,
"solution": false,
"task": false
}
},
"outputs": [],
"source": [
"# If this runs without error, it means the answer in your previous cell was correct.\n",
"assert ads_hash([(k,int(round(v))) for (k,v) in mean_duration_of_foraging_by_treatment.items()]) == '14fe49a61a'"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": false,
"editable": false,
"nbgrader": {
"cell_type": "markdown",
"checksum": "d69b06d2c27073b10f3ef7f942019177",
"grade": false,
"grade_id": "cell-6718f15155ae17c2",
"locked": true,
"schema_version": 3,
"solution": false,
"task": false
}
},
"source": [
"## Q10. Make a dictionary `num_unique_bees_per_nest` which has key-value pairs of nest number and the number of unique bees from that nest.\n",
"\n",
"Hint use `.groupby` on the `'nest'` column."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"deletable": false,
"nbgrader": {
"cell_type": "code",
"checksum": "36a7d40ffd9de04287936c994cf4c881",
"grade": false,
"grade_id": "cell-0df074ae6ba331e7",
"locked": false,
"schema_version": 3,
"solution": true,
"task": false
}
},
"outputs": [],
"source": [
"# YOUR CODE HERE\n",
"raise NotImplementedError()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"deletable": false,
"editable": false,
"nbgrader": {
"cell_type": "code",
"checksum": "7e95444864e0b05348226653decfba00",
"grade": true,
"grade_id": "cell-9fe3a1593835ed57",
"locked": true,
"points": 1,
"schema_version": 3,
"solution": false,
"task": false
}
},
"outputs": [],
"source": [
"# If this runs without error, it means the answer in your previous cell was correct.\n",
"assert ads_hash(num_unique_bees_per_nest)=='ddc47911c0'"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.10"
}
},
"nbformat": 4,
"nbformat_minor": 4
}