|
15 | 15 | "cell_type": "code", |
16 | 16 | "execution_count": 1, |
17 | 17 | "id": "b40d877a-7429-463b-9443-1f423d36121a", |
18 | | - "metadata": {}, |
| 18 | + "metadata": { |
| 19 | + "execution": { |
| 20 | + "iopub.execute_input": "2026-03-02T16:52:57.599573Z", |
| 21 | + "iopub.status.busy": "2026-03-02T16:52:57.599269Z", |
| 22 | + "iopub.status.idle": "2026-03-02T16:52:57.810704Z", |
| 23 | + "shell.execute_reply": "2026-03-02T16:52:57.810081Z", |
| 24 | + "shell.execute_reply.started": "2026-03-02T16:52:57.599551Z" |
| 25 | + } |
| 26 | + }, |
19 | 27 | "outputs": [], |
20 | 28 | "source": [ |
21 | 29 | "import pandas as pd" |
|
25 | 33 | "cell_type": "code", |
26 | 34 | "execution_count": 2, |
27 | 35 | "id": "c5fca3df-d2c0-47a5-81e7-1ffa864bb90a", |
28 | | - "metadata": {}, |
| 36 | + "metadata": { |
| 37 | + "execution": { |
| 38 | + "iopub.execute_input": "2026-03-02T16:52:57.811124Z", |
| 39 | + "iopub.status.busy": "2026-03-02T16:52:57.810993Z", |
| 40 | + "iopub.status.idle": "2026-03-02T16:52:57.961789Z", |
| 41 | + "shell.execute_reply": "2026-03-02T16:52:57.961117Z", |
| 42 | + "shell.execute_reply.started": "2026-03-02T16:52:57.811115Z" |
| 43 | + } |
| 44 | + }, |
29 | 45 | "outputs": [], |
30 | 46 | "source": [ |
31 | 47 | "customers = pd.read_csv(\n", |
|
34 | 50 | ")" |
35 | 51 | ] |
36 | 52 | }, |
37 | | - { |
38 | | - "cell_type": "code", |
39 | | - "execution_count": null, |
40 | | - "id": "c7bd864d-3e9d-4711-8003-2cd2dd4596b3", |
41 | | - "metadata": {}, |
42 | | - "outputs": [], |
43 | | - "source": [] |
44 | | - }, |
45 | 53 | { |
46 | 54 | "cell_type": "markdown", |
47 | 55 | "id": "67b62ec8-4c31-4e77-b46d-32364b01a79b", |
|
56 | 64 | "execution_count": 3, |
57 | 65 | "id": "c7cf08e8-bebd-47f7-824d-60f16c86c85b", |
58 | 66 | "metadata": { |
| 67 | + "execution": { |
| 68 | + "iopub.execute_input": "2026-03-02T16:52:57.962213Z", |
| 69 | + "iopub.status.busy": "2026-03-02T16:52:57.962124Z", |
| 70 | + "iopub.status.idle": "2026-03-02T16:52:57.971149Z", |
| 71 | + "shell.execute_reply": "2026-03-02T16:52:57.970648Z", |
| 72 | + "shell.execute_reply.started": "2026-03-02T16:52:57.962204Z" |
| 73 | + }, |
59 | 74 | "scrolled": true |
60 | 75 | }, |
61 | 76 | "outputs": [ |
|
293 | 308 | }, |
294 | 309 | { |
295 | 310 | "cell_type": "code", |
296 | | - "execution_count": 5, |
| 311 | + "execution_count": 4, |
297 | 312 | "id": "578db81c-f9ad-47c1-bf1f-c4183e6498f4", |
298 | | - "metadata": {}, |
| 313 | + "metadata": { |
| 314 | + "execution": { |
| 315 | + "iopub.execute_input": "2026-03-02T16:52:57.971598Z", |
| 316 | + "iopub.status.busy": "2026-03-02T16:52:57.971504Z", |
| 317 | + "iopub.status.idle": "2026-03-02T16:52:57.974641Z", |
| 318 | + "shell.execute_reply": "2026-03-02T16:52:57.974097Z", |
| 319 | + "shell.execute_reply.started": "2026-03-02T16:52:57.971589Z" |
| 320 | + } |
| 321 | + }, |
299 | 322 | "outputs": [ |
300 | 323 | { |
301 | 324 | "data": { |
302 | 325 | "text/plain": [ |
303 | | - "name object\n", |
304 | | - "job object\n", |
305 | | - "company object\n", |
306 | | - "street_address object\n", |
307 | | - "city object\n", |
308 | | - "state object\n", |
309 | | - "email object\n", |
310 | | - "user_name object\n", |
| 326 | + "name str\n", |
| 327 | + "job str\n", |
| 328 | + "company str\n", |
| 329 | + "street_address str\n", |
| 330 | + "city str\n", |
| 331 | + "state str\n", |
| 332 | + "email str\n", |
| 333 | + "user_name str\n", |
311 | 334 | "dtype: object" |
312 | 335 | ] |
313 | 336 | }, |
314 | | - "execution_count": 5, |
| 337 | + "execution_count": 4, |
315 | 338 | "metadata": {}, |
316 | 339 | "output_type": "execute_result" |
317 | 340 | } |
318 | 341 | ], |
319 | 342 | "source": [ |
320 | | - "customers.dtypes\n" |
| 343 | + "customers.dtypes" |
321 | 344 | ] |
322 | 345 | }, |
323 | 346 | { |
|
338 | 361 | }, |
339 | 362 | { |
340 | 363 | "cell_type": "code", |
341 | | - "execution_count": 6, |
| 364 | + "execution_count": 5, |
342 | 365 | "id": "0540e3f9-4db0-4b91-9f7f-878f53dc1a44", |
343 | 366 | "metadata": { |
| 367 | + "execution": { |
| 368 | + "iopub.execute_input": "2026-03-02T16:52:57.974962Z", |
| 369 | + "iopub.status.busy": "2026-03-02T16:52:57.974897Z", |
| 370 | + "iopub.status.idle": "2026-03-02T16:52:57.977799Z", |
| 371 | + "shell.execute_reply": "2026-03-02T16:52:57.977233Z", |
| 372 | + "shell.execute_reply.started": "2026-03-02T16:52:57.974955Z" |
| 373 | + }, |
344 | 374 | "scrolled": true |
345 | 375 | }, |
346 | 376 | "outputs": [ |
|
383 | 413 | }, |
384 | 414 | { |
385 | 415 | "cell_type": "code", |
386 | | - "execution_count": 8, |
| 416 | + "execution_count": 6, |
387 | 417 | "id": "1ae2365b-d980-409f-9f36-e2cee7525a35", |
388 | | - "metadata": {}, |
| 418 | + "metadata": { |
| 419 | + "execution": { |
| 420 | + "iopub.execute_input": "2026-03-02T16:52:57.978137Z", |
| 421 | + "iopub.status.busy": "2026-03-02T16:52:57.978075Z", |
| 422 | + "iopub.status.idle": "2026-03-02T16:52:57.982252Z", |
| 423 | + "shell.execute_reply": "2026-03-02T16:52:57.981695Z", |
| 424 | + "shell.execute_reply.started": "2026-03-02T16:52:57.978130Z" |
| 425 | + } |
| 426 | + }, |
389 | 427 | "outputs": [ |
390 | 428 | { |
391 | 429 | "data": { |
|
404 | 442 | "Length: 2080, dtype: bool" |
405 | 443 | ] |
406 | 444 | }, |
407 | | - "execution_count": 8, |
| 445 | + "execution_count": 6, |
408 | 446 | "metadata": {}, |
409 | 447 | "output_type": "execute_result" |
410 | 448 | } |
411 | 449 | ], |
412 | 450 | "source": [ |
413 | | - "customers.duplicated()\n" |
| 451 | + "customers.duplicated()" |
414 | 452 | ] |
415 | 453 | }, |
416 | 454 | { |
|
423 | 461 | }, |
424 | 462 | { |
425 | 463 | "cell_type": "code", |
426 | | - "execution_count": 9, |
| 464 | + "execution_count": 7, |
427 | 465 | "id": "8857b4f6-ec55-4c24-8adc-1645eca3c1c1", |
428 | | - "metadata": {}, |
| 466 | + "metadata": { |
| 467 | + "execution": { |
| 468 | + "iopub.execute_input": "2026-03-02T16:52:57.982595Z", |
| 469 | + "iopub.status.busy": "2026-03-02T16:52:57.982516Z", |
| 470 | + "iopub.status.idle": "2026-03-02T16:52:57.988313Z", |
| 471 | + "shell.execute_reply": "2026-03-02T16:52:57.987390Z", |
| 472 | + "shell.execute_reply.started": "2026-03-02T16:52:57.982588Z" |
| 473 | + } |
| 474 | + }, |
429 | 475 | "outputs": [ |
430 | 476 | { |
431 | 477 | "data": { |
|
469 | 515 | "Index: []" |
470 | 516 | ] |
471 | 517 | }, |
472 | | - "execution_count": 9, |
| 518 | + "execution_count": 7, |
473 | 519 | "metadata": {}, |
474 | 520 | "output_type": "execute_result" |
475 | 521 | } |
476 | 522 | ], |
477 | 523 | "source": [ |
478 | | - "customers[customers.duplicated()]\n" |
| 524 | + "customers[customers.duplicated()]" |
479 | 525 | ] |
480 | 526 | }, |
481 | 527 | { |
|
500 | 546 | }, |
501 | 547 | { |
502 | 548 | "cell_type": "code", |
503 | | - "execution_count": 12, |
| 549 | + "execution_count": 8, |
504 | 550 | "id": "757fa2fb-381c-4a92-b001-6ddf0ea51d7e", |
505 | | - "metadata": {}, |
| 551 | + "metadata": { |
| 552 | + "execution": { |
| 553 | + "iopub.execute_input": "2026-03-02T16:52:57.988680Z", |
| 554 | + "iopub.status.busy": "2026-03-02T16:52:57.988594Z", |
| 555 | + "iopub.status.idle": "2026-03-02T16:52:57.992716Z", |
| 556 | + "shell.execute_reply": "2026-03-02T16:52:57.991910Z", |
| 557 | + "shell.execute_reply.started": "2026-03-02T16:52:57.988673Z" |
| 558 | + } |
| 559 | + }, |
506 | 560 | "outputs": [ |
507 | 561 | { |
508 | 562 | "name": "stdout", |
|
537 | 591 | }, |
538 | 592 | { |
539 | 593 | "cell_type": "code", |
540 | | - "execution_count": 13, |
| 594 | + "execution_count": 9, |
541 | 595 | "id": "950221bf-7983-4bc7-bb80-8f250b6903fe", |
542 | 596 | "metadata": { |
| 597 | + "execution": { |
| 598 | + "iopub.execute_input": "2026-03-02T16:52:57.993220Z", |
| 599 | + "iopub.status.busy": "2026-03-02T16:52:57.993124Z", |
| 600 | + "iopub.status.idle": "2026-03-02T16:52:58.001182Z", |
| 601 | + "shell.execute_reply": "2026-03-02T16:52:58.000384Z", |
| 602 | + "shell.execute_reply.started": "2026-03-02T16:52:57.993212Z" |
| 603 | + }, |
543 | 604 | "scrolled": true |
544 | 605 | }, |
545 | 606 | "outputs": [ |
|
932 | 993 | " <td>Juan Carlos Iker Boix Ros</td>\n", |
933 | 994 | " <td>Pre phtgrapher</td>\n", |
934 | 995 | " <td>Pont, P44om4r4s 4nd Arjon4</td>\n", |
935 | | - " <td>Pasadzo de Josep Bentez Pso</td>\n", |
| 996 | + " <td>Pasadzo de Josep Bentez Pso</td>\n", |
936 | 997 | " <td>Las Palmas</td>\n", |
937 | 998 | " <td>Mia</td>\n", |
938 | 999 | " <td>srgio24@gail.co</td>\n", |
|
1354 | 1415 | "2075 Hamburg gutknechtevelyn@niemeier.com dkreusel " |
1355 | 1416 | ] |
1356 | 1417 | }, |
1357 | | - "execution_count": 13, |
| 1418 | + "execution_count": 9, |
1358 | 1419 | "metadata": {}, |
1359 | 1420 | "output_type": "execute_result" |
1360 | 1421 | } |
1361 | 1422 | ], |
1362 | 1423 | "source": [ |
1363 | | - "customers[customers.duplicated([\"user_name\"])]\n" |
| 1424 | + "customers[customers.duplicated([\"user_name\"])]" |
1364 | 1425 | ] |
1365 | 1426 | }, |
1366 | 1427 | { |
|
1373 | 1434 | }, |
1374 | 1435 | { |
1375 | 1436 | "cell_type": "code", |
1376 | | - "execution_count": 15, |
| 1437 | + "execution_count": 10, |
1377 | 1438 | "id": "46e76915-de2b-4227-aba6-0d53c43b651b", |
1378 | | - "metadata": {}, |
| 1439 | + "metadata": { |
| 1440 | + "execution": { |
| 1441 | + "iopub.execute_input": "2026-03-02T16:52:58.001995Z", |
| 1442 | + "iopub.status.busy": "2026-03-02T16:52:58.001905Z", |
| 1443 | + "iopub.status.idle": "2026-03-02T16:52:58.006510Z", |
| 1444 | + "shell.execute_reply": "2026-03-02T16:52:58.006029Z", |
| 1445 | + "shell.execute_reply.started": "2026-03-02T16:52:58.001987Z" |
| 1446 | + } |
| 1447 | + }, |
1379 | 1448 | "outputs": [ |
1380 | 1449 | { |
1381 | 1450 | "data": { |
|
1449 | 1518 | "337 christinefinke " |
1450 | 1519 | ] |
1451 | 1520 | }, |
1452 | | - "execution_count": 15, |
| 1521 | + "execution_count": 10, |
1453 | 1522 | "metadata": {}, |
1454 | 1523 | "output_type": "execute_result" |
1455 | 1524 | } |
|
1468 | 1537 | }, |
1469 | 1538 | { |
1470 | 1539 | "cell_type": "code", |
1471 | | - "execution_count": 17, |
| 1540 | + "execution_count": 11, |
1472 | 1541 | "id": "fe3c078d-e6cc-403d-9443-9415a798327d", |
1473 | | - "metadata": {}, |
| 1542 | + "metadata": { |
| 1543 | + "execution": { |
| 1544 | + "iopub.execute_input": "2026-03-02T16:52:58.006783Z", |
| 1545 | + "iopub.status.busy": "2026-03-02T16:52:58.006713Z", |
| 1546 | + "iopub.status.idle": "2026-03-02T16:52:58.009900Z", |
| 1547 | + "shell.execute_reply": "2026-03-02T16:52:58.009447Z", |
| 1548 | + "shell.execute_reply.started": "2026-03-02T16:52:58.006776Z" |
| 1549 | + } |
| 1550 | + }, |
1474 | 1551 | "outputs": [ |
1475 | 1552 | { |
1476 | 1553 | "name": "stdout", |
|
1529 | 1606 | "\n", |
1530 | 1607 | "https://www.python4data.science/de/latest/clean-prep/deduplicate.html#3.-Dedupe" |
1531 | 1608 | ] |
1532 | | - }, |
1533 | | - { |
1534 | | - "cell_type": "code", |
1535 | | - "execution_count": null, |
1536 | | - "id": "41592878-3dac-4a9b-9929-1bd5c11f2de9", |
1537 | | - "metadata": {}, |
1538 | | - "outputs": [], |
1539 | | - "source": [] |
1540 | 1609 | } |
1541 | 1610 | ], |
1542 | 1611 | "metadata": { |
|
1555 | 1624 | "name": "python", |
1556 | 1625 | "nbconvert_exporter": "python", |
1557 | 1626 | "pygments_lexer": "ipython3", |
1558 | | - "version": "3.12.9" |
| 1627 | + "version": "3.13.0" |
| 1628 | + }, |
| 1629 | + "widgets": { |
| 1630 | + "application/vnd.jupyter.widget-state+json": { |
| 1631 | + "state": {}, |
| 1632 | + "version_major": 2, |
| 1633 | + "version_minor": 0 |
| 1634 | + } |
1559 | 1635 | } |
1560 | 1636 | }, |
1561 | 1637 | "nbformat": 4, |
|
0 commit comments