1270 lines
222 KiB
Plaintext
1270 lines
222 KiB
Plaintext
|
{
|
|||
|
"cells": [
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 1,
|
|||
|
"metadata": {
|
|||
|
"nbgrader": {
|
|||
|
"grade": false,
|
|||
|
"grade_id": "cell-4591728b0e94385d",
|
|||
|
"locked": true,
|
|||
|
"schema_version": 3,
|
|||
|
"solution": false,
|
|||
|
"task": false
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"# You must run this cell, but you can ignore its contents.\n",
|
|||
|
"\n",
|
|||
|
"import hashlib\n",
|
|||
|
"\n",
|
|||
|
"def ads_hash(ty):\n",
|
|||
|
" \"\"\"Return a unique string for input\"\"\"\n",
|
|||
|
" ty_str = str(ty).encode()\n",
|
|||
|
" m = hashlib.sha256()\n",
|
|||
|
" m.update(ty_str)\n",
|
|||
|
" return m.hexdigest()[:10]"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 2,
|
|||
|
"metadata": {
|
|||
|
"nbgrader": {
|
|||
|
"grade": false,
|
|||
|
"grade_id": "cell-b2a0e9ec110c03e1",
|
|||
|
"locked": true,
|
|||
|
"schema_version": 3,
|
|||
|
"solution": false,
|
|||
|
"task": false
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"import pandas as pd"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"nbgrader": {
|
|||
|
"grade": false,
|
|||
|
"grade_id": "cell-bfeafe88c3a0be38",
|
|||
|
"locked": true,
|
|||
|
"schema_version": 3,
|
|||
|
"solution": false,
|
|||
|
"task": false
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"Let's get started by making a sample dataframe with fake data:"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 3,
|
|||
|
"metadata": {
|
|||
|
"nbgrader": {
|
|||
|
"grade": false,
|
|||
|
"grade_id": "cell-710e679d69e83ae2",
|
|||
|
"locked": true,
|
|||
|
"schema_version": 3,
|
|||
|
"solution": false,
|
|||
|
"task": false
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>number</th>\n",
|
|||
|
" <th>color</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>0</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>blue</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1</th>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>blue</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>2</th>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>red</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>3</th>\n",
|
|||
|
" <td>234</td>\n",
|
|||
|
" <td>red</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>4</th>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>red</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>5</th>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>blue</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>6</th>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>blue</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>7</th>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>red</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>8</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>green</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>9</th>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>yellow</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" number color\n",
|
|||
|
"0 1 blue\n",
|
|||
|
"1 2 blue\n",
|
|||
|
"2 3 red\n",
|
|||
|
"3 234 red\n",
|
|||
|
"4 2 red\n",
|
|||
|
"5 3 blue\n",
|
|||
|
"6 2 blue\n",
|
|||
|
"7 2 red\n",
|
|||
|
"8 1 green\n",
|
|||
|
"9 2 yellow"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"sample_df = pd.DataFrame({'number':[1,2,3,234,2,3,2,2,1,2], 'color':['blue','blue','red','red','red','blue','blue','red','green','yellow']})\n",
|
|||
|
"display(sample_df)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"nbgrader": {
|
|||
|
"grade": false,
|
|||
|
"grade_id": "cell-0320bf7579e7a5e0",
|
|||
|
"locked": true,
|
|||
|
"schema_version": 3,
|
|||
|
"solution": false,
|
|||
|
"task": false
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"## Q1 Create a Series named `condition` which is true for all rows where the color is red"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 4,
|
|||
|
"metadata": {
|
|||
|
"nbgrader": {
|
|||
|
"grade": false,
|
|||
|
"grade_id": "cell-c57131439b7d2882",
|
|||
|
"locked": false,
|
|||
|
"schema_version": 3,
|
|||
|
"solution": true,
|
|||
|
"task": false
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"condition = sample_df[\"color\"]==\"red\""
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 5,
|
|||
|
"metadata": {
|
|||
|
"nbgrader": {
|
|||
|
"grade": true,
|
|||
|
"grade_id": "cell-c53eedea62c243e1",
|
|||
|
"locked": true,
|
|||
|
"points": 1,
|
|||
|
"schema_version": 3,
|
|||
|
"solution": false,
|
|||
|
"task": false
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"# If this runs without error, it means the answer in your previous cell was correct.\n",
|
|||
|
"assert ads_hash(condition)=='28612bf01a'"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"nbgrader": {
|
|||
|
"grade": false,
|
|||
|
"grade_id": "cell-ac32ae211af0e86d",
|
|||
|
"locked": true,
|
|||
|
"schema_version": 3,
|
|||
|
"solution": false,
|
|||
|
"task": false
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"## Q2 Create a new `DataFrame` named `red_sample_df` which contains only the rows with red color from `sample_df`.\n",
|
|||
|
"\n",
|
|||
|
"Hint: use your `condition` Series from above."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 6,
|
|||
|
"metadata": {
|
|||
|
"nbgrader": {
|
|||
|
"grade": false,
|
|||
|
"grade_id": "cell-c4be64a1d42e707d",
|
|||
|
"locked": false,
|
|||
|
"schema_version": 3,
|
|||
|
"solution": true,
|
|||
|
"task": false
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"red_sample_df = sample_df[ condition ]"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 7,
|
|||
|
"metadata": {
|
|||
|
"nbgrader": {
|
|||
|
"grade": true,
|
|||
|
"grade_id": "cell-3c7eee81fa01c6e7",
|
|||
|
"locked": true,
|
|||
|
"points": 1,
|
|||
|
"schema_version": 3,
|
|||
|
"solution": false,
|
|||
|
"task": false
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"# If this runs without error, it means the answer in your previous cell was correct.\n",
|
|||
|
"assert ads_hash(red_sample_df)=='354bd7ec89'\n",
|
|||
|
"assert ads_hash(sample_df)=='21692a4d62'"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"nbgrader": {
|
|||
|
"grade": false,
|
|||
|
"grade_id": "cell-98dc0525f2068315",
|
|||
|
"locked": true,
|
|||
|
"schema_version": 3,
|
|||
|
"solution": false,
|
|||
|
"task": false
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"Now let's use the DataFrame `.groupby()` method to find the mean value of `number` for each `color`. (Hint: this will be useful later in this exercise.)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 8,
|
|||
|
"metadata": {
|
|||
|
"nbgrader": {
|
|||
|
"grade": false,
|
|||
|
"grade_id": "cell-43c0e1a1816c47a2",
|
|||
|
"locked": true,
|
|||
|
"schema_version": 3,
|
|||
|
"solution": false,
|
|||
|
"task": false
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"blue 2.0\n",
|
|||
|
"green 1.0\n",
|
|||
|
"red 60.25\n",
|
|||
|
"yellow 2.0\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"for color, gdf in sample_df.groupby('color'):\n",
|
|||
|
" mean_number = gdf[\"number\"].mean()\n",
|
|||
|
" print(f\"{color} {mean_number}\")"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"nbgrader": {
|
|||
|
"grade": false,
|
|||
|
"grade_id": "cell-9f757e6f5f690e81",
|
|||
|
"locked": true,
|
|||
|
"schema_version": 3,
|
|||
|
"solution": false,
|
|||
|
"task": false
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"# Using Pandas to work with real data\n",
|
|||
|
"\n",
|
|||
|
"<img src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAABH0AAAKYCAYAAADuT4S/AAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAAhdEVYdENyZWF0aW9uIFRpbWUAMjAyMDowNzoxNyAwNjoyNzo1MMKOQsIAAP94SURBVHhe7N0HYNRk/wfwb3vdLR2UtpRV9irDAaL8VWQ4UBDECSIucKHoq+IeKG7U1wE4UFEcqCDKEFGWOPBFUARk700p0EHpHv/8csld7rjZ3nUc3w/G5nJJLnny5Lk8v3vyJKhCASIiIiIiIiIiCijB2l8iIiIiIiIiIgogDPoQEREREREREQUgBn2IiIiIiIiIiAIQgz5ERERERERERAGIQR8iIiIiIiIiogDEoA8RERERERERUQBi0IeIiIiIiIiIKAAx6ENEREREREREFIAY9CEiIiIiIiIiCkBBFQptvNrJRwcFBaGk7Dh+3nsHMrN2IRihkA2KT4hBj+QJqB/VAeXlZQgONpkXIiIiIiIiIiIit6ol6FNUWoTy8nLl05QXyqcFBwfDFGSCKdikBn2KSrPw68E7cChzl/J+sDItGLHxEeiR/DqSY7oCZRX4ffvv+HXrLwhR/pWrYSEgOjoa15x5LZLqJVkCSEREREREREREVA1Bn8PHD+ORGQ8h63g2QoJNCA4KRmFZIS7p1B939r5LvcGsuDQHv6hBnx3K+yHKUkGIjY/EWUmvmoM+JRWY9PMkfL3uS4QhHBXKv9LSUtRPrI9nL34OnZp0QllFqbIq7W41LbgkwSMiIiIiIiIiolORT6MiFRXllqG8vFSdVlJagsy8TBzOzVADQBnK3wPZB3As7yhQrs6iBnFEkGxORZD6Vx03iAyLBIqVDS5Vpit/Q8pDUF5YjhCTBIkAkxosMrcS0lsLERERERERERGdqnwaGZFAS0WFrFKCLuZgjNxyZVL+hQaHIkSZpv4NVv6GhKrvC2lsFBpegTIUIDi0VP1rCitFeUWZNofc4VWmvC63GdS4jsSLlPe2Z8/Evxkf4N+DU7HhyBQcyPkfSovVRYmIiIiIiIiITjk+C/qUlwFbMr/F3/vfUIa3senIp0CFNOUJgslkQlBwkDrIrVfSlkf+yX8ixBSOiLJWSI4+EwlhXZAUfQbiTJ0RERJnnkECR0EmNTgkfQPpg6hQ+/EpRWbQbKzNnIhN2R9gU85k7M35GSUM+hARERERERHRKcp3QZ9S4FDhYmzNnYqtOZ9gb8F3QJDc6lWBClM5SiukBY/5X3BYMAqKC4AS87IhwVHonvw8Lmr9Ifq1fh8Xtn4PPZLeQFxEK7VFj7IIcotyER0VjejIaMTE1ENEeATq1YtVg0ESPSo6oexKSRTKisJRUhCG0OBoiRXVWpIuZWVKavhx0INj6jFQhtrA0Xb6c9D33x8cfZ4ngx6w9LWq5ClfppGj9Xs71BRH2+LLwXg++itfEhERERER6XzWkXNJEfDLnvtw8Pj/EBwUhrioxri03TRk5eXhlQUvIis3S31al3ycPL0rNDQMl3a9DBe1uwgIUSpbFWVawx89UmOuFJmCQ/DzxqXYcGgDLu86SO3/R+aQjQ4NDUGDqCQguBi/7r8dBw5vU+aPQIXpBNomDEd6whhERKsrI40ecJDb7k61p51JfpLhVNx3qr1O5XOSiIiIiIj8y6dBn+UZo7E3+2cEIQzx0c1wcYsvERIcKbVt86DWZ8yVmnJUYOrvH0GeuHXz/92iTz7J6t1/Y87qOXj0sscQFhqmTbVVXJ6HNfljsGvfeoSHRiMoNB/JuAJd6j+MyBhtplrm6NGj+N///oeSklIlaXzX8kOSOSwsDLGx9RATE4O4uHgkJyehXr162hxWcuilwim331WHkpISLFu2DLm5uUrlVu+Qyff0NIiLi0NCQjxSU1ORmJiovWulB4Aq48SJE1i6dKl6/Kxc7498Vnl5BZKSknDeeedW6fON5BhKIPXw4cNYseJP5bUnrYmC1M+XY19cXIxmzZqiR48e2nuVJ+tcsmQJsrOzlXWHKK89z9uyD8XFJUp+TUavXudrU/1PPw5Hjx5T0u9/6jb445yUczA+Pk49L2UfHZ2TQj+eREREREREVVWloI+0urEsXhGMnce+R27RXgQHmRARGoc2DYYo00NQViYVXvNs8lcWCZF+npXp89bPw+4jOzGgy+XqutQnfynrDQsJxdETR/HtylkY2/9hxLiI3pRXlGL/iUU4rsyvPvI9uARxYR3UPoKM8Qy9cleT5BYPqWhLwOfWW0eqARDfB12ClEqjuS+l8PBwtcJZv359dOnSRRk646yzzkL79u1tKpayXfLaH+mjp/vx48dx+eWDsHHjRkRGRqrT/Un2X4aIiAg10NK9e3dcdNGFOP/885X8Z+5ovPIV7AoMHz4CCxcuRHR0tJp+nqSdBL6kwv/ZZ58hPb2jTyr4+jpGjhyJH35YoB5vmebJ9pSWliI2NhaTJk1C794XVPoc0fP1unXrcP31w3Hs2DFLGnujqKgIrVq1wpw5s9U8Wx3nrPWcXIFRo0YhKyurUtvumvmclPXKZ0VGRiAxsYF6Tp5++mk444wz0KFDB2VfzfMKfbuIiIiIiIgqy2ctfYyUeptSWVMq3Upddmf2N8gsWonykhClmlyuvhEbF4e2cbcgKjRZne+Fuc9h1eE/EVQcjIogpZKn/CsuLUaTlKZ44bIXUD86UZ1PfnyXBiJ6HVCp16qc1pmLlYW0mWX5IMcNhaqVXpFbtWqVWjkuLCxUK+i+JodVH6Qlh/65MiQkJCAlJQX9+vXDkCFXID09XVvKPxVN2QY96HPJJf2xY8cOhIaGqtP9yX7/JTAiAZpmzZrh5ptvxtCh16nBJ31eT4ML+rzSMuSiiy7CgQMH1ECLfIardchyUunPyzuhpH1ffPnldEtwxtPPtqcHfKTV0U033WzZZ3f0bZH8d/vtt+GZZ8Yp0+R0qdx26MaNewaTJ09GVFSUGlDyZn36NpWWluHFF5/HDTfcYNk/fzKekzfcMEJtxeVJGnpLPzYySPBPBvlc2WcJvDVq1Aj9+/fHoEGD0KFDe22pqgQmiYiIiIjoVFepoI8sIpU5CcxsO7wVpUqlSZSXl6r97LROaoPw0EhIX83/O/AwMssWobQoVA3YlCgVwaSk+jgz7r9IjE5X1/X2T29hzvrZQDGUdZWqff+cKDqBHq174M2hb6HCBOQU7MDGvNdx9Ij8Cm9SA0Ah4eVIjbgQ7RNvgilU3walgqR80IrtK7BF2TaJMwkJJnVM7YjuLc5S9to8rSboFcyVK1di6NBhfgv6yPHRj5Ne8dYrjlIZLygoUF9LCxhp/TNy5Ci1FYxOX9YX9HVJ0Oeiiy7G9u3b/R70sd92eS37K2mdn5+vHgNp7fTEE0/g4osvUufxZp/NFfEgfPrp53jooYe0YEWp+hnO9kvWLcvJZ8vw1ltvqhV887q8r9Qbt/fiiy/BX3/9pQax3AVbZDn5PLmNqX37tliwYIEaDPNm/4305SS41rt3H2zZskUNgjlLB1ckXaT12wUXXIDZs7/TpvqXMegzbNj1fgv6SBrpaaWns55fZBv0c1JaOHXrdiZGjboNffr0VucT+rJERERERESeqtTPxxXKP3E07yheWfYyHpjxHzw2+xE88t0jeO7H53H0RJb6vgRXTMFKJbQ4FOWl4erfoHLltQSA1KduaRVhB/1nyPSSshJ1C6XlT2FpNg4fX4usgq04nLsBR/I2Y9/Rv5GVvx2lymxBMFkHZd2L1i/G+8vfw3vL3sWUX97Hu7+/g2Wblik1PO0DApxeaZWAglQoZZBggB4QkEq+DDk5OVi4cJF6q9mgQYPVW1yEelyUZesq2X8ZZB/0QfZdpkkrFLnla8OGDbj99tsxbtw4dRlvKtQyryTxDTcMVwMUUmGXQJarNJP39M+QwMLrr/9XDfq5ChS5oi8zZcoHWL16tSXg42x9+jTZBhnCwkLx6KOPqPnAuG3e0te7dOnP2LNnj5q2ldkfIflUtmfz5s3YtGmTOs1Vm
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"nbgrader": {
|
|||
|
"grade": false,
|
|||
|
"grade_id": "cell-927793ccea2a12d7",
|
|||
|
"locked": true,
|
|||
|
"schema_version": 3,
|
|||
|
"solution": false,
|
|||
|
"task": false
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"## Get the CSV file and upload it to Jupyter\n",
|
|||
|
"\n",
|
|||
|
"* Go to https://datadryad.org/stash/dataset/doi:10.5061/dryad.vb06d3k and download the data by clicking \"Download Dataset\".\n",
|
|||
|
" * Check that now have a file called `doi_10.5061_dryad.vb06d3k__v1.zip` about 99KB in size in your Downloads.\n",
|
|||
|
" * Unzip this file, which should create:\n",
|
|||
|
" - `spectra.csv`\n",
|
|||
|
" - `data_onevalueperbee.csv`\n",
|
|||
|
" - `data_visitationsequence.csv`\n",
|
|||
|
" * We are going focus on the file `data_onevalueperbee.csv`. Upload this file to your Jupyter server using the `Upload` button in the file view. Upload it into the same directory as this `.ipynb` file. Alternatively, if you are running this on your own computer, you can copy this `.csv` file into the directory with your `.ipynb` file.\n",
|
|||
|
"\n",
|
|||
|
"## Look at the file\n",
|
|||
|
"\n",
|
|||
|
"As we have covered, CSV stands for \"Comma separated values\" and is one of the most widely used file formats for scientific data. CSV files are typically like tables, potentially with column names in the first line. The lines of the file are then rows of the table. Spreadsheet programs can open most CSV files, sometimes with a few settings being required to deal with various CSV dialects.\n",
|
|||
|
"\n",
|
|||
|
"Let's take a look at this CSV file. This is mostly a repitition of what we recently looked at. We open the CSV file read the first few lines."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 9,
|
|||
|
"metadata": {
|
|||
|
"nbgrader": {
|
|||
|
"grade": false,
|
|||
|
"grade_id": "cell-0964768c19f2b15e",
|
|||
|
"locked": true,
|
|||
|
"schema_version": 3,
|
|||
|
"solution": false,
|
|||
|
"task": false
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"b'bee;nest;treatment;tstartforaging;logtstartforaging;maxminusstarttime;sumfeedingtime;logsumfeedingtime;numberofvisitations;sumdistcovered;meanvisitduration;revisitations;countyellow;countorange;countblue;visitblue;visitorange;visityellow;allcolorsvisited;averagedistance;meanspeed;sumflowersvisited;visitspatchA;visitspatchB;visitspatchC;patchesvisited;allpatchesvisited;visitsperpatchrounded\\r\\n'\n",
|
|||
|
"b'201531004;3;control;815;2.911157609;1965;80;1.903089987;21;1287.688282;3.80952381;4;14;4;3;1;1;1;1;80.4805176;2.767026543;12;6;13;2;3;1;7\\r\\n'\n",
|
|||
|
"b'201531005;3;control;108;2.033423755;;1;0;1;;1;0;1;0;0;0;0;1;0;;;1;1;0;0;1;0;0\\r\\n'\n",
|
|||
|
"b'201531006;3;control;90;1.954242509;788;136;2.133538908;56;2787.280939;2.428571429;9;41;10;5;1;1;1;1;60.5930639;4.572772117;21;9;25;22;3;1;19\\r\\n'\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Open the file in Python. The variable `fd` will be assigned the open file object.\n",
|
|||
|
"fd = open('data_onevalueperbee.csv', mode=\"rb\")\n",
|
|||
|
"\n",
|
|||
|
"# Iterate over the lines in the file, also creating an integer called `line_number` via the\n",
|
|||
|
"# use of the `enumerate()` function.\n",
|
|||
|
"for (line_number, line) in enumerate(fd.readlines()):\n",
|
|||
|
" # Print each line.\n",
|
|||
|
" print(line)\n",
|
|||
|
" if line_number >= 3:\n",
|
|||
|
" # Stop after a few lines\n",
|
|||
|
" break\n",
|
|||
|
"# Close the open file object.\n",
|
|||
|
"fd.close()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"nbgrader": {
|
|||
|
"grade": false,
|
|||
|
"grade_id": "cell-5dd558e28f33dab8",
|
|||
|
"locked": true,
|
|||
|
"schema_version": 3,
|
|||
|
"solution": false,
|
|||
|
"task": false
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"So, note that instead of a comma (`,`) separating the columns, we have a semicolon (`;`) instead. So we will need to use this when calling the Pandas `read_csv()` function. Thus, we must use the `sep=';'` keywoard argument. Also, this CSV file has some slightly unusual characters in it. From trial and error, I have learned that it must be opened with a `encoding='latin_1'` keyword argument to `read_csv()`.\n",
|
|||
|
"\n",
|
|||
|
"## Q3. Read the file `data_onevalueperbee.csv`? Into a dataframe named `df`.\n",
|
|||
|
"\n",
|
|||
|
"Hint: use the `read_csv` function not only with the filename as the required first positional argument, but also with the keyword arguments described above."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 10,
|
|||
|
"metadata": {
|
|||
|
"nbgrader": {
|
|||
|
"grade": false,
|
|||
|
"grade_id": "cell-107a3973d184c577",
|
|||
|
"locked": false,
|
|||
|
"schema_version": 3,
|
|||
|
"solution": true,
|
|||
|
"task": false
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"df = pd.read_csv('data_onevalueperbee.csv', sep=';', encoding='latin_1')"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 11,
|
|||
|
"metadata": {
|
|||
|
"nbgrader": {
|
|||
|
"grade": true,
|
|||
|
"grade_id": "cell-66fcba98183cb481",
|
|||
|
"locked": true,
|
|||
|
"points": 1,
|
|||
|
"schema_version": 3,
|
|||
|
"solution": false,
|
|||
|
"task": false
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"# If this runs without error, it means the answer in your previous cell was correct.\n",
|
|||
|
"assert ads_hash(df)=='15994f5b0e'"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 12,
|
|||
|
"metadata": {
|
|||
|
"nbgrader": {
|
|||
|
"grade": false,
|
|||
|
"grade_id": "cell-bdfe656f6d2cf933",
|
|||
|
"locked": true,
|
|||
|
"schema_version": 3,
|
|||
|
"solution": false,
|
|||
|
"task": false
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>bee</th>\n",
|
|||
|
" <th>nest</th>\n",
|
|||
|
" <th>treatment</th>\n",
|
|||
|
" <th>tstartforaging</th>\n",
|
|||
|
" <th>logtstartforaging</th>\n",
|
|||
|
" <th>maxminusstarttime</th>\n",
|
|||
|
" <th>sumfeedingtime</th>\n",
|
|||
|
" <th>logsumfeedingtime</th>\n",
|
|||
|
" <th>numberofvisitations</th>\n",
|
|||
|
" <th>sumdistcovered</th>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <th>allcolorsvisited</th>\n",
|
|||
|
" <th>averagedistance</th>\n",
|
|||
|
" <th>meanspeed</th>\n",
|
|||
|
" <th>sumflowersvisited</th>\n",
|
|||
|
" <th>visitspatchA</th>\n",
|
|||
|
" <th>visitspatchB</th>\n",
|
|||
|
" <th>visitspatchC</th>\n",
|
|||
|
" <th>patchesvisited</th>\n",
|
|||
|
" <th>allpatchesvisited</th>\n",
|
|||
|
" <th>visitsperpatchrounded</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>0</th>\n",
|
|||
|
" <td>201531004</td>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>control</td>\n",
|
|||
|
" <td>815</td>\n",
|
|||
|
" <td>2.911158</td>\n",
|
|||
|
" <td>1965.0</td>\n",
|
|||
|
" <td>80</td>\n",
|
|||
|
" <td>1.903090</td>\n",
|
|||
|
" <td>21</td>\n",
|
|||
|
" <td>1287.688282</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>80.480518</td>\n",
|
|||
|
" <td>2.767027</td>\n",
|
|||
|
" <td>12</td>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>13</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>7</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1</th>\n",
|
|||
|
" <td>201531005</td>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>control</td>\n",
|
|||
|
" <td>108</td>\n",
|
|||
|
" <td>2.033424</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0.000000</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>2</th>\n",
|
|||
|
" <td>201531006</td>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>control</td>\n",
|
|||
|
" <td>90</td>\n",
|
|||
|
" <td>1.954243</td>\n",
|
|||
|
" <td>788.0</td>\n",
|
|||
|
" <td>136</td>\n",
|
|||
|
" <td>2.133539</td>\n",
|
|||
|
" <td>56</td>\n",
|
|||
|
" <td>2787.280939</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>60.593064</td>\n",
|
|||
|
" <td>4.572772</td>\n",
|
|||
|
" <td>21</td>\n",
|
|||
|
" <td>9</td>\n",
|
|||
|
" <td>25</td>\n",
|
|||
|
" <td>22</td>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>19</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>3</th>\n",
|
|||
|
" <td>201531101</td>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>control</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0.000000</td>\n",
|
|||
|
" <td>320.0</td>\n",
|
|||
|
" <td>61</td>\n",
|
|||
|
" <td>1.785330</td>\n",
|
|||
|
" <td>16</td>\n",
|
|||
|
" <td>578.355534</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>72.294442</td>\n",
|
|||
|
" <td>3.651351</td>\n",
|
|||
|
" <td>7</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>15</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>5</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>4</th>\n",
|
|||
|
" <td>201531102</td>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>control</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0.000000</td>\n",
|
|||
|
" <td>656.0</td>\n",
|
|||
|
" <td>258</td>\n",
|
|||
|
" <td>2.411620</td>\n",
|
|||
|
" <td>49</td>\n",
|
|||
|
" <td>2845.622453</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>71.140561</td>\n",
|
|||
|
" <td>6.307130</td>\n",
|
|||
|
" <td>18</td>\n",
|
|||
|
" <td>4</td>\n",
|
|||
|
" <td>24</td>\n",
|
|||
|
" <td>21</td>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>16</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>5 rows × 28 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" bee nest treatment tstartforaging logtstartforaging \\\n",
|
|||
|
"0 201531004 3 control 815 2.911158 \n",
|
|||
|
"1 201531005 3 control 108 2.033424 \n",
|
|||
|
"2 201531006 3 control 90 1.954243 \n",
|
|||
|
"3 201531101 3 control 1 0.000000 \n",
|
|||
|
"4 201531102 3 control 0 0.000000 \n",
|
|||
|
"\n",
|
|||
|
" maxminusstarttime sumfeedingtime logsumfeedingtime numberofvisitations \\\n",
|
|||
|
"0 1965.0 80 1.903090 21 \n",
|
|||
|
"1 NaN 1 0.000000 1 \n",
|
|||
|
"2 788.0 136 2.133539 56 \n",
|
|||
|
"3 320.0 61 1.785330 16 \n",
|
|||
|
"4 656.0 258 2.411620 49 \n",
|
|||
|
"\n",
|
|||
|
" sumdistcovered ... allcolorsvisited averagedistance meanspeed \\\n",
|
|||
|
"0 1287.688282 ... 1 80.480518 2.767027 \n",
|
|||
|
"1 NaN ... 0 NaN NaN \n",
|
|||
|
"2 2787.280939 ... 1 60.593064 4.572772 \n",
|
|||
|
"3 578.355534 ... 1 72.294442 3.651351 \n",
|
|||
|
"4 2845.622453 ... 1 71.140561 6.307130 \n",
|
|||
|
"\n",
|
|||
|
" sumflowersvisited visitspatchA visitspatchB visitspatchC \\\n",
|
|||
|
"0 12 6 13 2 \n",
|
|||
|
"1 1 1 0 0 \n",
|
|||
|
"2 21 9 25 22 \n",
|
|||
|
"3 7 0 15 1 \n",
|
|||
|
"4 18 4 24 21 \n",
|
|||
|
"\n",
|
|||
|
" patchesvisited allpatchesvisited visitsperpatchrounded \n",
|
|||
|
"0 3 1 7 \n",
|
|||
|
"1 1 0 0 \n",
|
|||
|
"2 3 1 19 \n",
|
|||
|
"3 2 0 5 \n",
|
|||
|
"4 3 1 16 \n",
|
|||
|
"\n",
|
|||
|
"[5 rows x 28 columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 12,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Let's look at the first few lines of the file with the pandas DataFrame `.head()` method:\n",
|
|||
|
"\n",
|
|||
|
"df.head()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"nbgrader": {
|
|||
|
"grade": false,
|
|||
|
"grade_id": "cell-85664fce05868fd1",
|
|||
|
"locked": true,
|
|||
|
"schema_version": 3,
|
|||
|
"solution": false,
|
|||
|
"task": false
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"## Q4. How many rows of data are in the dataframe you read from the file `data_onevalueperbee.csv`? Put the answer in a variable `num_rows`.\n",
|
|||
|
"\n",
|
|||
|
"Hint: you can use `len(df)` to calculate the number of rows in the DataFrame `df`."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 13,
|
|||
|
"metadata": {
|
|||
|
"nbgrader": {
|
|||
|
"grade": false,
|
|||
|
"grade_id": "cell-e10d9bb147d32ad5",
|
|||
|
"locked": false,
|
|||
|
"schema_version": 3,
|
|||
|
"solution": true,
|
|||
|
"task": false
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"159\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Type your answer here and then run this and the following cell.\n",
|
|||
|
"num_rows = len(df)\n",
|
|||
|
"print(num_rows)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 14,
|
|||
|
"metadata": {
|
|||
|
"nbgrader": {
|
|||
|
"grade": true,
|
|||
|
"grade_id": "cell-227f7cff2325cb9a",
|
|||
|
"locked": true,
|
|||
|
"points": 1,
|
|||
|
"schema_version": 3,
|
|||
|
"solution": false,
|
|||
|
"task": false
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"# If this runs without error, it means the answer in your previous cell was correct.\n",
|
|||
|
"assert ads_hash(num_rows)=='ff2ccb6ba4'"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"nbgrader": {
|
|||
|
"grade": false,
|
|||
|
"grade_id": "cell-6b133b10967bef7a",
|
|||
|
"locked": true,
|
|||
|
"schema_version": 3,
|
|||
|
"solution": false,
|
|||
|
"task": false
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"## Q5. What are the unique values of the `nest` column?\n",
|
|||
|
"\n",
|
|||
|
"Put your answer in the variable `unique_nests`. Hint: use the `.unique()` method on the Series for the `nest` column."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 15,
|
|||
|
"metadata": {
|
|||
|
"nbgrader": {
|
|||
|
"grade": false,
|
|||
|
"grade_id": "cell-e0b36658308de988",
|
|||
|
"locked": false,
|
|||
|
"schema_version": 3,
|
|||
|
"solution": true,
|
|||
|
"task": false
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"[ 3 4 6 8 9 10]\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"unique_nests = df['nest'].unique()\n",
|
|||
|
"print(unique_nests)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 16,
|
|||
|
"metadata": {
|
|||
|
"nbgrader": {
|
|||
|
"grade": true,
|
|||
|
"grade_id": "cell-208034c33360f85d",
|
|||
|
"locked": true,
|
|||
|
"points": 1,
|
|||
|
"schema_version": 3,
|
|||
|
"solution": false,
|
|||
|
"task": false
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"# If this runs without error, it means the answer in your previous cell was correct.\n",
|
|||
|
"assert ads_hash(unique_nests)=='59e69ce283'"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"nbgrader": {
|
|||
|
"grade": false,
|
|||
|
"grade_id": "cell-7802496c83a4d50a",
|
|||
|
"locked": true,
|
|||
|
"schema_version": 3,
|
|||
|
"solution": false,
|
|||
|
"task": false
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"## Q6. What are the unique values of the `treatment` column?\n",
|
|||
|
"\n",
|
|||
|
"Put your answer in the variable `unique_treatments`."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 17,
|
|||
|
"metadata": {
|
|||
|
"nbgrader": {
|
|||
|
"grade": false,
|
|||
|
"grade_id": "cell-7a258af7f499be42",
|
|||
|
"locked": false,
|
|||
|
"schema_version": 3,
|
|||
|
"solution": true,
|
|||
|
"task": false
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"['control' 'imidacloprid']\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"unique_treatments = df['treatment'].unique()\n",
|
|||
|
"print(unique_treatments)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 18,
|
|||
|
"metadata": {
|
|||
|
"nbgrader": {
|
|||
|
"grade": true,
|
|||
|
"grade_id": "cell-96c51bcd8e5a3ac3",
|
|||
|
"locked": true,
|
|||
|
"points": 1,
|
|||
|
"schema_version": 3,
|
|||
|
"solution": false,
|
|||
|
"task": false
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"# If this runs without error, it means the answer in your previous cell was correct.\n",
|
|||
|
"assert ads_hash(unique_treatments) == '670c49c25a'"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Now let's check which nests are in each treatment:"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 19,
|
|||
|
"metadata": {
|
|||
|
"nbgrader": {
|
|||
|
"grade": false,
|
|||
|
"grade_id": "cell-2c145981d446e40d",
|
|||
|
"locked": true,
|
|||
|
"schema_version": 3,
|
|||
|
"solution": false,
|
|||
|
"task": false
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"control\n",
|
|||
|
"[3 4 8]\n",
|
|||
|
"\n",
|
|||
|
"imidacloprid\n",
|
|||
|
"[ 6 9 10]\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"for treatment, gdf in df.groupby('treatment'):\n",
|
|||
|
" print(treatment)\n",
|
|||
|
" print(gdf['nest'].unique())\n",
|
|||
|
" print()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"nbgrader": {
|
|||
|
"grade": false,
|
|||
|
"grade_id": "cell-e5dbccbf3a349a88",
|
|||
|
"locked": true,
|
|||
|
"schema_version": 3,
|
|||
|
"solution": false,
|
|||
|
"task": false
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"## Q7. Now make a `seaborn` `boxplot` of the time to start foraging (`tstartforaging` in the CSV file) on the `y` axis, `nest` number on the `x` axis, and with the color (`hue`) being the `treatment`. Save the plot to a file named `Figure2a.png`.\n",
|
|||
|
"\n",
|
|||
|
"Your plot should look like: <img src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAoAAAAHgCAYAAAA10dzkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAAPYQAAD2EBqD+naQAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nOzde1xVdb7/8fdyI2xQ2YoGSKKZ2sXULDUDZ9K0vGV2O+UMilamZqXiJfs1Zyor0zRDKU+OdSobk5N10qZmwrLGLIdMNDmp0W0ikSOIGoIXLgrr94fjPm5BFPZi7w3r9Xw8eARrfff6fvYS22+/a63v1zBN0xQAAABso4m/CwAAAIBvEQABAABshgAIAABgMwRAAAAAmyEAAgAA2AwBEAAAwGYIgAAAADZDAAQAALAZAiAAAIDNEAABAABshgAIAABgMwRAAAAAmyEAAgAA2AwBEAAAwGYIgAAAADZDAAQAALAZAiAAAIDNEAABAABshgAIAABgMwRAAAAAmyEAAgAA2AwBEAAAwGYIgAAAADZDAAQAALAZAiAAAIDNEAABAABshgAIAABgMwRAAAAAmyEAAgAA2AwBEAAAwGYIgAAAADZDAAQAALAZAiAAAIDNEAABAABshgAIAABgMwRAAAAAmyEAAgAA2AwBEAAAwGYIgAAAADZDAAQAALAZAiAAAIDNEAABAABshgAIAABgMwRAAAAAmyEAAgAA2AwBEAAAwGYIgAAAADZDAAQAALAZAiAAAIDNEAABAABshgAIAABgMwRAAAAAmyEAAgAA2AwBEAAAwGYIgAAAADZDAAQAALCZIH8X0JBVVlZq7969atGihQzD8Hc5AADgPJimqcOHDysmJkZNmthzLIwA6IW9e/cqNjbW32UAAIA62LNnj9q1a+fvMvyCAOiFFi1aSDr5CxQeHu7nagAAwPkoLi5WbGys+3PcjgiAXjh12Tc8PJwACABAA2Pn27fseeEbAADAxgiAAAAANkMABAAAsBnuAQQANGimaerEiROqqKjwdykIEA6HQ0FBQba+x+9cCIAAgAarvLxceXl5OnbsmL9LQYAJCwtT27ZtFRwc7O9SAhIBEADQIFVWVio7O1sOh0MxMTEKDg5mxAcyTVPl5eXav3+/srOz1aVLF9tO9lwTAiAAoEEqLy9XZWWlYmNjFRYW5u9yEEBCQ0PVtGlT7d69W+Xl5XI6nf4uKeAQiQEADRqjO6gOvxc14+wAaHDS09M1atQopaen+7sUAGiQCIAAGpTS0lIlJydr3759Sk5OVmlpqb9LAoAGhwAIoEFZtWqVDh48KEk6ePCgUlNT/VwRIA0YMEBJSUk+6WvFihVq2bKlT/o6H7/88osMw1BmZqa/S0EtEAABNBi5ublKTU2VaZqSTj7tl5qaqtzcXD9XBtTs1FyFQKAgAAJoEEzTVEpKylm3nwqFgK/dfffd2rhxo1JSUmQYhgzD0IoVK2QYhj766CP17t1bISEh+uKLLyRJH3zwgXr16iWn06mLL75YTz75pEc4TE5OVvfu3dWsWTPFxsbqgQce0JEjRyRJn332me655x4VFRW5+5ozZ44k6aKLLtLcuXM1duxYNW/eXB06dNBf/vIX7d+/X7fccouaN2+u7t27a+vWrR71p6en67rrrlNoaKhiY2M1depUHT161L3/oosu0rx583TvvfeqRYsWat++vV5++WX3/o4dO0qSrrrqKhmGoQEDBtTHaYbFCIAAGoScnBxlZGRUWe2hoqJCGRkZysnJ8VNlsLuUlBTFxcVpwoQJysvLU15enmJjYyVJs2fP1vz585WVlaUePXroo48+0pgxYzR16lR9++23Wr58uVasWKFnnnnGfbwmTZrohRde0M6dO/XGG2/o73//u2bPni1Jio+P15IlSxQeHu7ua9asWe7XLl68WP369dP27dt10003KTExUWPHjtWYMWP09ddfq3Pnzho7dqz7H0w7duzQkCFDdPvtt+ubb77R6tWrtWnTJj300EMe7/H5559X7969tX37dj3wwAOaPHmyvvvuO0nSli1bJEmffPKJ8vLytGbNmvo72bCOiTorKioyJZlFRUX+LgVo9CorK81Zs2aZAwcONPv37+/+GjhwoPnwww+blZWV/i4RPlZSUmJ+++23ZklJib9LMfv3729OmzbN/fOGDRtMSeZ7773n0e63v/2tOW/ePI9tK1euNNu2bXvWY7/99ttm69at3T+//vrrpsvlqtKuQ4cO5pgxY9w/5+XlmZLMxx57zL3tyy+/NCWZeXl5pmmaZmJiojlx4kSP43zxxRdmkyZN3Of1zONWVlaakZGR5rJly0zTNM3s7GxTkrl9+/azvgd/qOn3g89v02QiaAANgmEYmjZtmsaNG1ftdlaAQCDq3bu3x8/btm1TRkaGx4hfRUWFSktLdezYMYWFhWnDhg2aN2+evv32WxUXF+vEiRMqLS3V0aNH1axZsxr769Gjh/v7qKgoSVL37t2rbCsoKFB0dLS2bdumn376SatWrXK3MU3TvcrK5ZdfXuW4hmEoOjpaBQUFtT0dCCAEQAANRrt27ZSQkKA333xTpmnKMAwlJCTowgsv9HdpQLXODGyVlZV68skndfvtt1dp63Q6tXv3bg0fPlz333+/nn76aUVERGjTpk0aP368jh8/fs7+mjZt6v7+1D+KqttWWVnp/u+kSZM0derUKsdq3759tcc9dZxTx0DDRAAE0KCMHj1aaWlpOnDggNq0aaOEhAR/lwQoODi4yv2p1bn66qv1/fffq3PnztXu37p1q06cOKHnn3/evZLF22+/Xae+zsfVV1+tXbt2nbWe8xEcHCxJltUE3+AhEAANitPp1IwZMxQVFaXp06ezxicCwkUXXaSvvvpKv/zyiw4cOHDW0bHHH39cf/7znzVnzhzt2rVLWVlZWr16tf74xz9Kkjp16qQTJ07oxRdf1M8//6yVK1fqT3/6U5W+jhw5ok8//VQHDhzQsWPH6lz3I488oi+//FIPPvigMjMz9eOPP+r999/XlClTzvsYkZGRCg0N1bp167Rv3z4VFRXVuR74DgEQQIMTHx+v1atXKz4+3t+lAJKkWbNmyeFwqGvXrrrgggvO+lT6kCFD9Ne//lXr169Xnz59dO211yo5OVkdOnSQJPXs2VPJyclasGCBunXrplWrVmn+/Pkex4iPj9f999+vUaNG6YILLtDChQvrXHePHj20ceNG/fjjj/rtb3+rq666So899pjatm173scICgrSCy+8oOXLlysmJka33HJLneuB7ximyeRZdVVcXCyXy6WioiKFh4f7uxwAsJXS0lJlZ2erY8eOjASjipp+P/j8ZgQQAADAdgiAAAAANkMABAAAsBkCIAAAgM0QAAEAAGyGAAgAAGAzBEAAAACbIQACAADYDAEQAADAZoL8XQAAAFaqqKiQLxe5MgxDDofDZ/352i+//KKOHTtq+/bt6tmzp7/LgUUIgACARqOiokK3/9udKir81Wd9ulpFaM1/vxNQIfCiiy5SUlKSkpKS/F0KAhQBEADQaJimqaLCX3X46rGS4YO7nMxK6es/+3TE0SoVFRUyDENNmnA3mB3xpw4AaHyMJlITH3zVMWRWVlZqwYIF6ty5s0JCQtS+fXs988wzkqQdO3Zo4MCBCg0NVevWrTVx4kQdOXLE/dq7775bt956qxYtWqS2bduqdevWevDBB3X8+HFJ0oABA7R7925Nnz5dhmHIMAxJ0ooVK9SyZUv99a9/VdeuXRUSEqLdu3ersrJSTz31lNq1a6eQkBD17NlT69at8/IPAIGOAAgAgI89+uijWrBggR577DF9++23Sk1NVVRUlI4dO6ahQ4eqVatWysjI0DvvvKNPPvlEDz30kMfrN2zYoH/+85/asGGD3njjDa1YsUIrVqyQJK1Zs0bt2rXTU089pby8POXl5blfd+zYMc2fP1//+Z//qV27dikyMlIpKSl6/vnntWjRIn3zzTcaMmSIRo4cqR9//
|
|||
|
"\n",
|
|||
|
"Check the left panel of [Figure 2a](https://royalsocietypublishing.org/doi/10.1098/rspb.2018.0506#RSPB20180506F2) from the Lämsä et al. *Proc. Roy Soc B.* 2018 paper. Do you see any similarities or differences with your plot?\n",
|
|||
|
"\n",
|
|||
|
"Hint: import seaborn and call its `boxplot()` function with `data`, `x`, `y`, `hue` keyword arguments with values `df`, `nest`, `tstartforaging`, `treatment`."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 20,
|
|||
|
"metadata": {
|
|||
|
"nbgrader": {
|
|||
|
"grade": false,
|
|||
|
"grade_id": "cell-df2e372b53d245fd",
|
|||
|
"locked": true,
|
|||
|
"schema_version": 3,
|
|||
|
"solution": false,
|
|||
|
"task": false
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"# Run this cell to import seaborn\n",
|
|||
|
"import seaborn as sns"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 21,
|
|||
|
"metadata": {
|
|||
|
"nbgrader": {
|
|||
|
"grade": true,
|
|||
|
"grade_id": "cell-208bdcdc93f0ff65",
|
|||
|
"locked": false,
|
|||
|
"points": 1,
|
|||
|
"schema_version": 3,
|
|||
|
"solution": true,
|
|||
|
"task": false
|
|||
|
},
|
|||
|
"tags": []
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"<Axes: xlabel='nest', ylabel='tstartforaging'>"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 21,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkQAAAGwCAYAAABIC3rIAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABKJElEQVR4nO3de1wU9f4/8NeywHIRVrHYhUQEFS8oZtLRlQ54y3vmqZOeRNS8C6l4yY6JIgWaVl5KobASQ8rqmKXl8ZJfhQxURAlF00oETS4auKAiyO78/ujHnjbQWFiYvbyejwcP3ZnPzrxnwva1n/nMZySCIAggIiIismI2YhdAREREJDYGIiIiIrJ6DERERERk9RiIiIiIyOoxEBEREZHVYyAiIiIiq8dARERERFbPVuwCzIVWq8W1a9fg4uICiUQidjlERETUAIIgoKKiAp6enrCxuX8/EANRA127dg1eXl5il0FERESNcOXKFbRr1+6+6xmIGsjFxQXA7yfU1dVV5GqIiIioIcrLy+Hl5aX7HL8fBqIGqr1M5urqykBERERkZv5quAsHVRMREZHVYyAiIiIiq8dARERERFaPY4iIiMhsaLVaVFdXi10GmRA7OztIpdImb4eBiIiIzEJ1dTXy8vKg1WrFLoVMTOvWraFUKps0TyADERERmTxBEFBYWAipVAovL68HTrBH1kMQBNy5cwclJSUAAA8Pj0Zvi4GIiIhMXk1NDe7cuQNPT084OTmJXQ6ZEEdHRwBASUkJ3N3dG335jBGbiIhMnkajAQDY29uLXAmZotqQfO/evUZvg4GIiIjMBp8lSfUxxu8FL5kRUbPQaDTIyclBaWkp3NzcEBAQYJQ7QYiImgMDEREZXVpaGuLj41FUVKRbplQqER4ejuDgYBErIyKqHy+ZEZFRpaWlITo6Gr6+vti8eTP27t2LzZs3w9fXF9HR0UhLSxO7RCKiOhiIiMhoNBoN4uPjoVKpEBsbC39/fzg5OcHf3x+xsbFQqVRISEjQDZAlMkcDBgxAZGRki+xrypQpGDt2bIvsqyGSkpLQunVrsctoFgxERGQ0OTk5KCoqQmhoaJ15YmxsbBAaGorCwkLk5OSIVCFR8xMEATU1NWKXQQZiICIioyktLQUA+Pj41Lu+dnltOyJzM2XKFKSmpmLjxo2QSCSQSCRISkqCRCLB/v37ERgYCJlMhu+++w6CIGDt2rXw9fWFo6MjevXqhf/85z+6bWk0GkybNg0+Pj5wdHREly5dsHHjRt36lStXYtu2bfjqq690+zpy5AguX74MiUSCzz77DH//+9/h6OiIxx9/HBcvXkRmZiYCAwPRqlUrDB8+HNevX9erf+vWrejWrRscHBzQtWtXxMfH69bVbveLL77AwIED4eTkhF69eiEjIwMAcOTIEbzwwgtQq9W6elauXNm8J7wlCdQgarVaACCo1WqxSyEyWadOnRJCQkKEs2fP1rv+7NmzQkhIiHDq1KkWrozMXWVlpXDu3DmhsrJS1Dpu3rwpqFQqYcaMGUJhYaFQWFgofPvttwIAISAgQDhw4IDw888/Czdu3BBeeeUVoWvXrsK+ffuEX375Rdi6dasgk8mEI0eOCIIgCNXV1cKKFSuEEydOCJcuXRK2b98uODk5CZ9++qkgCIJQUVEhjBs3Thg+fLhuX1VVVUJeXp4AQLftc+fOCf369RMee+wxYcCAAcLRo0eFU6dOCZ06dRJmz56tqz0xMVHw8PAQdu7cKVy6dEnYuXOn4ObmJiQlJQmCIOht9+uvvxYuXLgg/POf/xS8vb2Fe/fuCVVVVcKGDRsEV1dXXT0VFRUt/x+hHg/6/Wjo5zcDUQMxEBH9tZqaGmH8+PHC0qVLBY1Go7dOo9EIS5cuFf71r38JNTU1IlVI5spUApEgCEJISIgwf/583evDhw8LAIQvv/xSt+zWrVuCg4ODkJ6ervfeadOmCc8///x9tx0eHi48++yzuteTJ08Wnn76ab02tcHl/fff1y375JNPBADCoUOHdMtWr14tdOnSRffay8tL+Pjjj/W29dprrwkqleq+283NzRUACOfPnxcEQRC2bt0qyOXy+9YvFmMEIt52T0RGI5VKER4ejujoaERFRSE0NBQ+Pj7Iy8tDSkoKMjIyEBMTw/mIyCIFBgbq/n7u3DncvXsXTz75pF6b6upq9O7dW/f63Xffxfvvv4/8/HxUVlaiuroajz76aIP2FxAQoPu7QqEAAPTs2VNvWe0zvq5fv44rV65g2rRpmDFjhq5NTU0N5HL5fbdb+2ywkpISdO3atUF1mSsGIiIyquDgYMTExCA+Ph4RERG65R4eHoiJieE8RGSxnJ2ddX/XarUAgG+++QaPPPKIXjuZTAYA+Oyzz7BgwQK89dZbUKlUcHFxwRtvvIHjx483aH92dna6v9fO1PznZbV11P65ZcsW9O3bV287f/6CUt92a99vyRiIiMjogoODERQUxJmqySLZ29v/5dQR3bt3h0wmQ0FBAUJCQupt891336F///4IDw/XLfvll18M3ldDKBQKPPLII7h06RJCQ0MbvR1j1WOKGIiIqFlIpVK9SwNElqJDhw44fvw4Ll++jFatWtXbe+Li4oLFixdjwYIF0Gq1eOKJJ1BeXo709HS0atUKkydPRqdOnfDRRx9h//798PHxQXJyMjIzM/Xu0uzQoQP279+PCxcuoG3btnUubxli5cqVmDdvHlxdXTFixAhUVVXh5MmTKCsrw8KFCxt87Ldu3cKhQ4fQq1cvODk56R6sau542z0REZEBFi9eDKlUiu7du+Phhx9GQUFBve1ee+01rFixAqtXr0a3bt0wbNgw7NmzRxd4Zs+ejWeeeQbjx49H37598dtvv+n1FgHAjBkz0KVLFwQGBuLhhx/G999/3+i6p0+fjvfffx9JSUno2bMnQkJCkJSUdN9pMurTv39/zJ49G+PHj8fDDz+MtWvXNroeUyMRBEEQuwhzUF5eDrlcDrVaDVdXV7HLISKyKnfv3kVeXh58fHzg4OAgdjlkYh70+9HQz2/2EBEREZHVYyAiIiIiq8dARERERFaPgYiIiIisHgMRERERWT0GIiIiIrJ6DERERERk9RiIiIiIyOrx0R1ERGS2iouLoVarW2x/crlc92R5S5SUlITIyEjcvHlT7FJaHAMRERGZpeLiYkwMm4R71VUttk87exm2J39kUqGoQ4cOiIyMRGRkpNilmDUGIiIiMktqtRr3qqtQ6RsCrUPjH3raUDZ31cClVKjVapMKRA2h0WggkUhgY8ORMvfDM0NERGZN6yCH1vmh5v9pZOjSarVYs2YNOnXqBJlMhvbt2yMuLg4AcObMGQwaNAiOjo5o27YtZs6ciVu3buneO2XKFIwdOxZvvvkmPDw80LZtW0RERODevXsAgAEDBiA/Px8LFiyARCKBRCIB8Pulr9atW+Prr79G9+7dIZPJkJ+fj7KyMkyaNAlt2rSBk5MTRowYgZ9++qmJ/wUsAwMRERFRM1q6dCnWrFmD5cuX49y5c/j444+hUChw584dDB8+HG3atEFmZiY+//xzfPvtt3jxxRf13n/48GH88ssvOHz4MLZt24akpCQkJSUBAL744gu0a9cOr776KgoLC1FYWKh73507d7B69Wq8//77yM3Nhbu7O6ZMmYKTJ09i9+7dyMjIgCAIGDlypC5gWTNeMiMiImomFRUV2LhxIzZt2oTJkycDADp27IgnnngCW7ZsQWVlJT766CM4OzsDADZt2oSnnnoKa9as0V2Wa9OmDTZt2gSpVIquXbti1KhROHToEGbMmAE3NzdIpVK4uLhAqVTq7fvevXuIj49Hr169AAA//fQTdu/eje+//x79+/cHAKSkpMDLywtffvklnnvuuZY6LSaJPURERETN5Pz586iqqsLgwYPrXderVy9dGAKAoKAgaLVaXLhwQbfM398fUqlU99rDwwMlJSV/uW97e3sEBATo7c/W1hZ9+/bVLWvbti26dOmC8+fPG3xsloaBiIiIqJk4Ojred50gCLoxP3/2x+V2dnZ11mm12gbt+4/bEQTB4DqsCQMRERF
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"sns.boxplot(data=df,x='nest',y='tstartforaging',hue='treatment')"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"nbgrader": {
|
|||
|
"grade": false,
|
|||
|
"grade_id": "cell-c9c67275053aadd0",
|
|||
|
"locked": true,
|
|||
|
"schema_version": 3,
|
|||
|
"solution": false,
|
|||
|
"task": false
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"## Q8. Now make a plot using a single line using seaborn's `boxplot` function which shows the duration of the foraging period on the y axis.\n",
|
|||
|
"\n",
|
|||
|
"This should be fairly similar to the second panel in [Figure 2a](https://royalsocietypublishing.org/doi/10.1098/rspb.2018.0506#RSPB20180506F2) from the Lämsä et al. Proc. Roy Soc B. 2018 paper.\n",
|
|||
|
"\n",
|
|||
|
"Hint: the duration of the foraging period is in the `'maxminusstarttime'` column."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 22,
|
|||
|
"metadata": {
|
|||
|
"nbgrader": {
|
|||
|
"grade": true,
|
|||
|
"grade_id": "cell-d6bb242d388543aa",
|
|||
|
"locked": false,
|
|||
|
"points": 1,
|
|||
|
"schema_version": 3,
|
|||
|
"solution": true,
|
|||
|
"task": false
|
|||
|
},
|
|||
|
"tags": []
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"<Axes: xlabel='nest', ylabel='maxminusstarttime'>"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 22,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkQAAAGwCAYAAABIC3rIAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABJ9UlEQVR4nO3de1wU5f4H8M9y2wWFVVB2MQFBARXFTDqGesB73lLL0hIR00TDQryWaQEmalbeRUNLvGB2USs7pqlHofIOGt7S30kCSy6WuKBcXeb3h4c5baDhsjK7zOf9evHSfebZme9sBB+feeYZhSAIAoiIiIhkzErqAoiIiIikxkBEREREssdARERERLLHQERERESyx0BEREREssdARERERLLHQERERESyZyN1AZaisrIS165dg6OjIxQKhdTlEBERUS0IgoCioiK0aNECVlb3HgdiIKqla9euwd3dXeoyiIiIyAhXr15Fy5Yt77mdgaiWHB0dAdz9QJ2cnCSuhoiIiGqjsLAQ7u7u4u/xe2EgqqWqy2ROTk4MRERERBbm76a7cFI1ERERyR4DEREREckeAxERERHJHucQERGRxaisrER5ebnUZZAZsbW1hbW1dZ33w0BEREQWoby8HJmZmaisrJS6FDIzTZo0gVarrdM6gQxERERk9gRBQE5ODqytreHu7n7fBfZIPgRBQHFxMfLz8wEAbm5uRu+LgYiIiMzenTt3UFxcjBYtWsDBwUHqcsiM2NvbAwDy8/Ph6upq9OUzRmwiIjJ7er0eAGBnZydxJWSOqkJyRUWF0ftgICIiIovBZ0lSTUzxfcFLZg2EXq9HRkYGbty4AWdnZwQEBJhk1j0REZEcMBA1AKmpqUhISEBubq7YptVqERkZieDgYAkrIyIisgy8ZGbhUlNTERMTA29vb6xZswZ79uzBmjVr4O3tjZiYGKSmpkpdIhERkdljILJger0eCQkJCAoKwoIFC+Dv7w8HBwf4+/tjwYIFCAoKwtq1a8XJiEREVHc9e/ZEdHR0vRxr3LhxGD58eL0cqzaSkpLQpEkTqct4KBiILFhGRgZyc3MRGhpabU0OKysrhIaGIicnBxkZGRJVSEQkP4Ig4M6dO1KXQQ+IgciC3bhxAwDg5eVV4/aq9qp+RERUN+PGjUNKSgpWrFgBhUIBhUKBpKQkKBQK7Nu3D4GBgVAqlfjuu+8gCAKWLFkCb29v2Nvbo1OnTvj888/Ffen1ekyYMAFeXl6wt7eHn58fVqxYIW6PjY3Fpk2b8OWXX4rHOnz4MH755RcoFAp8+umn+Oc//wl7e3s8/vjjuHz5Mk6ePInAwEA0btwYAwYMwPXr1w3q37hxI9q1aweVSoW2bdsiISFB3Fa13507d6JXr15wcHBAp06dcPToUQDA4cOH8eKLL0Kn04n1xMbGPtwPvD4JVCs6nU4AIOh0OqlLEaWnpwshISHCuXPnatx+7tw5ISQkREhPT6/nyoiITKukpES4cOGCUFJSImkdN2/eFIKCgoSJEycKOTk5Qk5OjnDgwAEBgBAQECB8++23wn/+8x/h999/F9544w2hbdu2wt69e4Wff/5Z2Lhxo6BUKoXDhw8LgiAI5eXlwltvvSWcOHFCuHLlirB161bBwcFB+OSTTwRBEISioiJh5MiRwoABA8RjlZWVCZmZmQIAcd8XLlwQnnjiCeGxxx4TevbsKXz//fdCenq60KZNG2Hy5Mli7YmJiYKbm5uwY8cO4cqVK8KOHTsEZ2dnISkpSRAEwWC/X3/9tXDp0iXh2WefFTw9PYWKigqhrKxMWL58ueDk5CTWU1RUVP//EWpwv++P2v7+ZiCqJXMMRHfu3BFGjRolzJkzR9Dr9Qbb9Hq9MGfOHOH5558X7ty5I1GFRESmYS6BSBAEISQkRJg6dar4+tChQwIA4YsvvhDbbt26JahUKuHIkSMG750wYYLwwgsv3HPfkZGRwogRI8TX4eHhwrBhwwz6VAWXDRs2iG0ff/yxAEA4ePCg2LZo0SLBz89PfO3u7i5s27bNYF9vv/22EBQUdM/9nj9/XgAgXLx4URAEQdi4caOgVqvvWb9UTBGIeNu9BbO2tkZkZCRiYmIwb948hIaGwsvLC5mZmUhOTsbRo0cRFxfH9YiIzEBpaSmys7OlLgMA4OHhAZVKJXUZDU5gYKD49wsXLqC0tBT9+vUz6FNeXo7OnTuLr9etW4cNGzYgKysLJSUlKC8vx6OPPlqr4wUEBIh/12g0AICOHTsatFU94+v69eu4evUqJkyYgIkTJ4p97ty5A7Vafc/9Vj0bLD8/H23btq1VXZaKgcjCBQcHIy4uDgkJCZgyZYrY7ubmhri4OK5DRGQmsrOzERERIXUZAIDExET4+vpKXUaD06hRI/HvlZWVAIB//etfeOSRRwz6KZVKAMCnn36KadOm4f3330dQUBAcHR3x7rvv4vjx47U6nq2trfj3qpWa/9pWVUfVn+vXr0fXrl0N9vPXfzTXtN+q9zdkDEQNQHBwMLp3786VqonMmIeHBxITE41+f1ZWFuLj4zF37lx4enrWuRYynp2d3d8uZ9K+fXsolUpkZ2cjJCSkxj7fffcdunXrhsjISLHt559/fuBj1YZGo8EjjzyCK1euIDQ01Oj9mKoec8RA1EBYW1sbDMMSkXlRqVQmGZXx9PTk6I7EWrVqhePHj+OXX35B48aNaxw9cXR0xMyZMzFt2jRUVlaiR48eKCwsxJEjR9C4cWOEh4ejTZs22Lx5M/bt2wcvLy9s2bIFJ0+eNLhzuFWrVti3bx8uXboEFxeXape3HkRsbCyioqLg5OSEgQMHoqysDKdOnUJBQQGmT59e63O/desWDh48iE6dOsHBwUF8sKql4233RERED2DmzJmwtrZG+/bt0bx583vODXv77bfx1ltvYdGiRWjXrh2efPJJ7N69Www8kydPxjPPPINRo0aha9eu+OOPPwxGiwBg4sSJ8PPzQ2BgIJo3b44ffvjB6LpfeuklbNiwAUlJSejYsSNCQkKQlJR0z6VbatKtWzdMnjwZo0aNQvPmzbFkyRKj6zE3CkEQBKmLsASFhYVQq9XQ6XRwcnKSuhwikpnLly8jIiJCtvN/SktLkZmZCS8vL04Ip2ru9/1R29/fHCEiIiIi2WMgIiIiItljICIiIiLZYyAiIiIi2WMgIiIiItljICIiIiLZYyAiIiIi2WMgIiIiItnjozuIiMhi5eXlQafT1dvx1Gq1+GT5higpKQnR0dG4efOm1KXUO7MJRIsWLcIbb7yBqVOnYvny5QAAQRAQFxeHxMREFBQUoGvXrlizZg38/f3F95WVlWHmzJn4+OOPUVJSgj59+iAhIQEtW7YU+xQUFCAqKgpfffUVAGDo0KFYtWoVmjRpUp+nSEREJpSXl4cxYWNRUV5Wb8e0tVNi65bNZhWKWrVqhejoaERHR0tdikUzi0B08uRJJCYmIiAgwKB9yZIlWLp0KZKSkuDr64sFCxagX79+uHTpEhwdHQEA0dHR2L17N7Zv3w4XFxfMmDEDQ4YMQVpamvi099GjR+PXX3/F3r17AQAREREICwvD7t276/dEiYjIZHQ6HSrKy1DiHYJKlfEPPa0tq1IdcCUFOp3OrAJRbej1eigUClhZcabMvUj+ydy6dQuhoaFYv349mjZtKrYLgoDly5dj7ty5eOaZZ9ChQwds2rQJxcXF2LZtG4C7/zN8+OGHeP/999G3b1907twZW7duxdmzZ3HgwAEAwMWLF7F3715s2LABQUFBCAoKwvr16/H111/j0qVLkpwzERGZTqVKjcpGzR7+l5Ghq7KyEu+88w7atGkDpVIJDw8PxMfHAwDOnj2L3r17w97eHi4uLoiIiMCtW7fE944bNw7Dhw/He++9Bzc3N7i4uGDKlCmoqKgAAPTs2RNZWVmYNm0aFAoFFAoFgLuXvpo0aYKvv/4a7du3h1KpRFZWFgoKCjB27Fg0bdoUDg4OGDhwIP7v//6vjv8FGgbJA9GUKVMwePBg9O3b16A9MzMTubm56N+/v9imVCoREhKCI0eOAADS0tJQUVFh0KdFixb
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Type your answer here\n",
|
|||
|
"sns.boxplot(data=df,x='nest',y='maxminusstarttime',hue='treatment')"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"nbgrader": {
|
|||
|
"grade": false,
|
|||
|
"grade_id": "cell-b926439e7eb1c2b5",
|
|||
|
"locked": true,
|
|||
|
"schema_version": 3,
|
|||
|
"solution": false,
|
|||
|
"task": false
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"## Q9. Make a dictionary called `mean_duration_of_foraging_by_treatment`. Fill this with items whose keys are treatment strings (`'control'` and `'imidacloprid'`) and whose values are the mean of all `'maxminusstarttime'` values for rows in which the `treatment` variable is the same as the key. \n",
|
|||
|
"\n",
|
|||
|
"Hint: use `groupby` to group on the `treatment` column and use the `.mean()` method on the `maxminusstarttime` Series within each group data frame. Store each mean as the value in your dictionary for the key."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 23,
|
|||
|
"metadata": {
|
|||
|
"nbgrader": {
|
|||
|
"grade": false,
|
|||
|
"grade_id": "cell-cdfad08c7777905d",
|
|||
|
"locked": false,
|
|||
|
"schema_version": 3,
|
|||
|
"solution": true,
|
|||
|
"task": false
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"control\n",
|
|||
|
"1394.4603174603174\n",
|
|||
|
"\n",
|
|||
|
"imidacloprid\n",
|
|||
|
"910.8152173913044\n",
|
|||
|
"\n",
|
|||
|
"{'control': 1394.4603174603174, 'imidacloprid': 910.8152173913044}\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"mean_duration_of_foraging_by_treatment = {}\n",
|
|||
|
"\n",
|
|||
|
"for treatment, gdf in df.groupby('treatment'):\n",
|
|||
|
" print(treatment)\n",
|
|||
|
" print(gdf['maxminusstarttime'].mean())\n",
|
|||
|
" print()\n",
|
|||
|
" mean_duration_of_foraging_by_treatment[treatment] = gdf['maxminusstarttime'].mean()\n",
|
|||
|
" \n",
|
|||
|
"print(mean_duration_of_foraging_by_treatment)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 24,
|
|||
|
"metadata": {
|
|||
|
"nbgrader": {
|
|||
|
"grade": true,
|
|||
|
"grade_id": "cell-ad0c60eabc04c1b7",
|
|||
|
"locked": true,
|
|||
|
"points": 1,
|
|||
|
"schema_version": 3,
|
|||
|
"solution": false,
|
|||
|
"task": false
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"# If this runs without error, it means the answer in your previous cell was correct.\n",
|
|||
|
"assert ads_hash([(k,int(round(v))) for (k,v) in mean_duration_of_foraging_by_treatment.items()]) == '14fe49a61a'"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"nbgrader": {
|
|||
|
"grade": false,
|
|||
|
"grade_id": "cell-6718f15155ae17c2",
|
|||
|
"locked": true,
|
|||
|
"schema_version": 3,
|
|||
|
"solution": false,
|
|||
|
"task": false
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"## Q10. Make a dictionary `num_unique_bees_per_nest` which has key-value pairs of nest number and the number of unique bees from that nest.\n",
|
|||
|
"\n",
|
|||
|
"Hint use `.groupby` on the `'nest'` column."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 25,
|
|||
|
"metadata": {
|
|||
|
"nbgrader": {
|
|||
|
"grade": false,
|
|||
|
"grade_id": "cell-0df074ae6ba331e7",
|
|||
|
"locked": false,
|
|||
|
"schema_version": 3,
|
|||
|
"solution": true,
|
|||
|
"task": false
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"{3: 8, 4: 23, 6: 37, 8: 34, 9: 33, 10: 24}\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"num_unique_bees_per_nest = {}\n",
|
|||
|
"for nest, gdf in df.groupby('nest'):\n",
|
|||
|
" num_unique_bees_per_nest[nest] = len(gdf['bee'].unique())\n",
|
|||
|
"print(num_unique_bees_per_nest)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 26,
|
|||
|
"metadata": {
|
|||
|
"nbgrader": {
|
|||
|
"grade": true,
|
|||
|
"grade_id": "cell-9fe3a1593835ed57",
|
|||
|
"locked": true,
|
|||
|
"points": 1,
|
|||
|
"schema_version": 3,
|
|||
|
"solution": false,
|
|||
|
"task": false
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"# If this runs without error, it means the answer in your previous cell was correct.\n",
|
|||
|
"assert ads_hash(num_unique_bees_per_nest)=='ddc47911c0'"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"metadata": {
|
|||
|
"celltoolbar": "Create Assignment",
|
|||
|
"kernelspec": {
|
|||
|
"display_name": "Python 3 (ipykernel)",
|
|||
|
"language": "python",
|
|||
|
"name": "python3"
|
|||
|
},
|
|||
|
"language_info": {
|
|||
|
"codemirror_mode": {
|
|||
|
"name": "ipython",
|
|||
|
"version": 3
|
|||
|
},
|
|||
|
"file_extension": ".py",
|
|||
|
"mimetype": "text/x-python",
|
|||
|
"name": "python",
|
|||
|
"nbconvert_exporter": "python",
|
|||
|
"pygments_lexer": "ipython3",
|
|||
|
"version": "3.11.10"
|
|||
|
}
|
|||
|
},
|
|||
|
"nbformat": 4,
|
|||
|
"nbformat_minor": 4
|
|||
|
}
|