docker run --help
docker run -it --entrypoint bash python:3.9
pip list
docker-compose up
:services:
pgdatabase:
image: postgres:13
environment:
- POSTGRES_USER=root
- POSTGRES_PASSWORD=root
- POSTGRES_DB=ny_taxi
volumes:
- "./ny_taxi_postgres_data:/var/lib/postgresql/data:rw"
ports:
- "5432:5432"
pgadmin:
image: dpage/pgadmin4
environment:
- [email protected]
- PGADMIN_DEFAULT_PASSWORD=root
ports:
- "8080:80"
docker build -t taxi_ingest:v001 .
winpty docker run -it --network=postgres_default \\ taxi_ingest:v001 \\ --user=root \\ --password=root \\ --host=pgdatabase \\ --port=5432 \\ --table_name=taxi_zones \\ --db=ny_taxi \\ --url="[<https://s3.amazonaws.com/nyc-tlc/misc/taxi+_zone_lookup.csv>](<https://s3.amazonaws.com/nyc-tlc/misc/taxi+_zone_lookup.csv>)"
winpty docker run -it --network=postgres_default \\ taxi_ingest:v001 \\ --user=root \\ --password=root \\ --host=pgdatabase \\ --port=5432 \\ --table_name=taxi_zones \\ --db=ny_taxi \\ --url="<https://github.com/DataTalksClub/nyc-tlc-data/releases/download/green/green_tripdata_2019-09.csv.gz”
>SELECT COUNT(g."index") FROM
public.green_tripdata_201909 g
WHERE DATE(g.lpep_dropoff_datetime ) = '2019-09-18'
AND DATE(g.lpep_pickup_datetime) = '2019-09-18';
SELECT DATE(g.lpep_pickup_datetime) FROM
public.green_tripdata_201909 g
ORDER BY g.trip_distance DESC;
SELECT * FROM
public.green_tripdata_201909 g
LEFT JOIN public.taxi_zones t ON g."PULocationID" = t."LocationID"
WHERE t."Borough" IN ('Brooklyn', 'Manhattan', 'Queens')
AND t."Borough" != 'Unknown'
AND DATE(g.lpep_pickup_datetime) = '2019-09-18';
SELECT * FROM
public.green_tripdata_201909 g
LEFT JOIN public.taxi_zones t ON g."PULocationID" = t."LocationID"
WHERE t."Zone" IN ('Astoria')
AND EXTRACT(MONTH FROM DATE(g.lpep_pickup_datetime)) = 9
ORDER BY g.tip_amount DESC;
It turns out the PULocationID with the largest tip is in Zone JFK Airport