Q&A 3 How do you inspect the GDP data you just loaded?
3.1 Explanation
After loading data into MariaDB, the first step is to validate that the rows are there, and the structure matches what we expect (country, year, gdp_usd_current). This prevents silent errors (empty tables, wrong encodings, wrong column types, etc.). Do three quick checks: row count, sample rows, and distinct values (e.g., countries). Optionally, check year ranges and missing values.
3.2 SQL/MySQL Code
-- 1) How many rows were loaded?
SELECT COUNT(*) AS total_rows
FROM gdp_wdi;
-- 2) Do the columns look right?
DESCRIBE gdp_wdi;
-- 3) Peek at a few rows
SELECT *
FROM gdp_wdi
LIMIT 10;
-- 4) Which countries do we have?
SELECT DISTINCT country
FROM gdp_wdi
ORDER BY country;
-- 5) Year coverage per country (min/max)
SELECT country, MIN(year) AS min_year, MAX(year) AS max_year, COUNT(*) AS n_years
FROM gdp_wdi
GROUP BY country
ORDER BY country;
-- 6) Any missing GDP values?
SELECT COUNT(*) AS n_missing
FROM gdp_wdi
WHERE gdp_usd_current IS NULL;3.3 Python Code — robust connection (works with XAMPP on macOS)
This version tries TCP (127.0.0.1) first, then falls back to the XAMPP Unix socket path on macOS.
It also shows a SQLAlchemy option, which is often the most reliable withpandas.
3.4 Install (once)
# Quick validation with pandas + PyMySQL/SQLAlchemy (robust)
import os
import pandas as pd
# --- OPTION A: SQLAlchemy engine (recommended) ---
# Tip: use 127.0.0.1 (not "localhost") to force TCP on macOS.
def using_sqlalchemy():
from sqlalchemy import create_engine, text
user = os.getenv("DB_USER", "root")
pwd = os.getenv("DB_PASS", "") # set if needed
host = os.getenv("DB_HOST", "127.0.0.1") # use 127.0.0.1 on macOS
port = int(os.getenv("DB_PORT", "3306"))
db = os.getenv("DB_NAME", "cdi_economics")
engine = create_engine(f"mysql+pymysql://{user}:{pwd}@{host}:{port}/{db}")
with engine.connect() as conn:
total_rows = pd.read_sql(text("SELECT COUNT(*) AS total_rows FROM gdp_wdi;"), conn)
print("Total rows (SQLAlchemy):", int(total_rows.iloc[0]["total_rows"]))
# --- OPTION B: PyMySQL direct connection ---
# Tries TCP first, then Unix socket (XAMPP default on macOS)
def using_pymysql():
import pymysql, pymysql.cursors
user = os.getenv("DB_USER", "root")
pwd = os.getenv("DB_PASS", "")
db = os.getenv("DB_NAME", "cdi_economics")
port = int(os.getenv("DB_PORT", "3306"))
# Try TCP connection (127.0.0.1)
try:
conn = pymysql.connect(
host="127.0.0.1",
port=port,
user=user,
password=pwd,
database=db,
autocommit=True,
cursorclass=pymysql.cursors.DictCursor,
)
df = pd.read_sql("SELECT COUNT(*) AS total_rows FROM gdp_wdi;", conn)
print("Total rows (PyMySQL TCP):", int(df.iloc[0]["total_rows"]))
conn.close()
return
except Exception as e:
print("TCP connection failed, will try Unix socket. Error:", e)
# Fallback: XAMPP Unix socket path (macOS)
# Common XAMPP path:
socket_path = "/Applications/XAMPP/xamppfiles/var/mysql/mysql.sock"
# If you use MAMP/MariaDB, adjust the path accordingly.
try:
conn = pymysql.connect(
unix_socket=socket_path,
user=user,
password=pwd,
database=db,
autocommit=True,
cursorclass=pymysql.cursors.DictCursor,
)
df = pd.read_sql("SELECT COUNT(*) AS total_rows FROM gdp_wdi;", conn)
print("Total rows (PyMySQL socket):", int(df.iloc[0]["total_rows"]))
conn.close()
except Exception as e:
print("Unix socket connection failed. Error:", e)
print("👉 Check that MySQL/MariaDB is running and the socket path/credentials are correct.")
if __name__ == "__main__":
# Prefer SQLAlchemy (more tolerant with pandas versions)
try:
using_sqlalchemy()
except Exception as e:
print("SQLAlchemy path failed, trying PyMySQL directly. Error:", e)
using_pymysql()Total rows (SQLAlchemy): 208