import sqlalchemy as db
import pandas as pd
import numpy as np
import pymysql

engine = db.create_engine("mysql+pymysql://root:password@localhost/sakila?host=localhost?port=3306")
connection = engine.connect()
metadata = db.MetaData()

actor = db.Table('actor', metadata, autoload=True, autoload_with=engine)

# Print the column names
print(actor.columns.keys())

['actor_id', 'first_name', 'last_name', 'last_update']

# Print full table metadata
print(repr(metadata.tables['actor']))

Table('actor', MetaData(bind=None), Column('actor_id', SMALLINT(display_width=5, unsigned=True), table=<actor>, primary_key=True, nullable=False), Column('first_name', VARCHAR(length=45), table=<actor>, nullable=False), Column('last_name', VARCHAR(length=45), table=<actor>, nullable=False), Column('last_update', TIMESTAMP(), table=<actor>, nullable=False, server_default=DefaultClause(<sqlalchemy.sql.elements.TextClause object at 0x000000000F100550>, for_update=False)), schema=None)

Querying¶

Table and MetaData have already been imported. The metadata is available as metadata.

#Equivalent to 'SELECT * FROM census'
query = db.select([actor])
ResultProxy = connection.execute(query)
ResultSet = ResultProxy.fetchall()

# Limit 3
ResultSet[:3]

[(1, 'PENELOPE', 'GUINESS', datetime.datetime(2006, 2, 15, 4, 34, 33)),
 (2, 'NICK', 'WAHLBERG', datetime.datetime(2006, 2, 15, 4, 34, 33)),
 (3, 'ED', 'CHASE', datetime.datetime(2006, 2, 15, 4, 34, 33))]

ResultProxy: The object returned by the .execute() method. It can be used in a variety of ways to get the data returned by the query.

ResultSet: The actual data asked for in the query when using a fetch method such as .fetchall() on a ResultProxy.

Dealing with Large ResultSet¶

We use .fetchmany() to load optimal no of rows and overcome memory issues in case of large datasets

while flag:
    partial_results = ResultProxy.fetchmany(50)
    if(partial_results == []): 
	flag = False
    //
	code
   //
ResultProxy.close()

#Convert to dataframe
df = pd.DataFrame(ResultSet)
df.columns = ResultSet[0].keys()

Filtering data¶

Lets see some examples of raw SQLite Queries and queries using SQLAlchemy.

where

SQL :

SELECT * FROM city WHERE country_id = 6

#SQLAlchemy :

engine = db.create_engine("mysql+pymysql://root:password@localhost/sakila?host=localhost?port=3306")
connection = engine.connect()
metadata = db.MetaData()
city = db.Table('city', metadata, autoload=True, autoload_with=engine)
query = db.select([city]).where(city.columns.country_id == 6)
ResultProxy = connection.execute(query)
ResultSet = ResultProxy.fetchall()
ResultSet

[(20, 'Almirante Brown', 6, datetime.datetime(2006, 2, 15, 4, 45, 25)),
 (43, 'Avellaneda', 6, datetime.datetime(2006, 2, 15, 4, 45, 25)),
 (45, 'Baha Blanca', 6, datetime.datetime(2006, 2, 15, 4, 45, 25)),
 (128, 'Crdoba', 6, datetime.datetime(2006, 2, 15, 4, 45, 25)),
 (161, 'Escobar', 6, datetime.datetime(2006, 2, 15, 4, 45, 25)),
 (165, 'Ezeiza', 6, datetime.datetime(2006, 2, 15, 4, 45, 25)),
 (289, 'La Plata', 6, datetime.datetime(2006, 2, 15, 4, 45, 25)),
 (334, 'Merlo', 6, datetime.datetime(2006, 2, 15, 4, 45, 25)),
 (424, 'Quilmes', 6, datetime.datetime(2006, 2, 15, 4, 45, 25)),
 (454, 'San Miguel de Tucumn', 6, datetime.datetime(2006, 2, 15, 4, 45, 25)),
 (457, 'Santa F', 6, datetime.datetime(2006, 2, 15, 4, 45, 25)),
 (524, 'Tandil', 6, datetime.datetime(2006, 2, 15, 4, 45, 25)),
 (567, 'Vicente Lpez', 6, datetime.datetime(2006, 2, 15, 4, 45, 25))]

in

SQL :

SELECT name, city FROM customer_list WHERE country IN (Argentina, Austria)

#SQLAlchemy :
engine = db.create_engine("mysql+pymysql://root:password@localhost/sakila?host=localhost?port=3306")
connection = engine.connect()
metadata = db.MetaData()
customer_list = db.Table('customer_list', metadata, autoload=True, autoload_with=engine)

query = db.select([customer_list.columns.name, customer_list.columns.city]).where(customer_list.columns.country.in_(['Argentina', 'Austria']))
ResultProxy = connection.execute(query)
ResultSet = ResultProxy.fetchall()
ResultSet

[('WILLIE MARKHAM', 'Almirante Brown'),
 ('JORDAN ARCHULETA', 'Avellaneda'),
 ('JASON MORRISSEY', 'Baha Blanca'),
 ('KIMBERLY LEE', 'Crdoba'),
 ('MICHEAL FORMAN', 'Escobar'),
 ('DARRYL ASHCRAFT', 'Ezeiza'),
 ('JULIA FLORES', 'La Plata'),
 ('FLORENCE WOODS', 'Merlo'),
 ('PERRY SWAFFORD', 'Quilmes'),
 ('LYDIA BURKE', 'San Miguel de Tucumn'),
 ('ERIC ROBERT', 'Santa F'),
 ('LEONARD SCHOFIELD', 'Tandil'),
 ('WILLIE HOWELL', 'Vicente Lpez'),
 ('AUDREY RAY', 'Graz'),
 ('JILL HAWKINS', 'Linz'),
 ('NORA HERRERA', 'Salzburg')]

and, or, not

SQL : SELECT * FROM customer_list WHERE country = 'Argentina' AND NOT country = 'Austria'

#SQLAlchemy :
engine = db.create_engine("mysql+pymysql://root:password@localhost/sakila?host=localhost?port=3306")
connection = engine.connect()
metadata = db.MetaData()
customer_list = db.Table('customer_list', metadata, autoload=True, autoload_with=engine)

query = db.select([customer_list]).where(db.and_(customer_list.columns.country == 'Argentina', customer_list.columns.country != 'Austria'))
ResultProxy = connection.execute(query)
ResultSet = ResultProxy.fetchall()
ResultSet

[(359, 'WILLIE MARKHAM', '1623 Kingstown Drive', '91299', '296394569728', 'Almirante Brown', 'Argentina', 'active', 2),
 (560, 'JORDAN ARCHULETA', '1229 Varanasi (Benares) Manor', '40195', '817740355461', 'Avellaneda', 'Argentina', 'active', 1),
 (322, 'JASON MORRISSEY', '1427 A Corua (La Corua) Place', '85799', '972574862516', 'Baha Blanca', 'Argentina', 'active', 1),
 (24, 'KIMBERLY LEE', '96 Tafuna Way', '99865', '934730187245', 'Crdoba', 'Argentina', 'active', 2),
 (445, 'MICHEAL FORMAN', '203 Tambaram Street', '73942', '411549550611', 'Escobar', 'Argentina', 'active', 1),
 (530, 'DARRYL ASHCRAFT', '166 Jinchang Street', '86760', '717566026669', 'Ezeiza', 'Argentina', 'active', 2),
 (89, 'JULIA FLORES', '1926 El Alto Avenue', '75543', '846225459260', 'La Plata', 'Argentina', 'active', 1),
 (107, 'FLORENCE WOODS', '1532 Dzerzinsk Way', '9599', '330838016880', 'Merlo', 'Argentina', 'active', 1),
 (585, 'PERRY SWAFFORD', '773 Dallas Manor', '12664', '914466027044', 'Quilmes', 'Argentina', 'active', 1),
 (243, 'LYDIA BURKE', '1483 Pathankot Street', '37288', '686015532180', 'San Miguel de Tucumn', 'Argentina', 'active', 1),
 (331, 'ERIC ROBERT', '430 Kumbakonam Drive', '28814', '105470691550', 'Santa F', 'Argentina', 'active', 1),
 (405, 'LEONARD SCHOFIELD', '88 Nagaon Manor', '86868', '779461480495', 'Tandil', 'Argentina', 'active', 1),
 (219, 'WILLIE HOWELL', '1244 Allappuzha (Alleppey) Place', '20657', '991802825778', 'Vicente Lpez', 'Argentina', 'active', 2)]

order by

SQL : SELECT * FROM payment ORDER BY amount DESC, amount LIMIT 3

#SQLAlchemy :
engine = db.create_engine("mysql+pymysql://root:password@localhost/sakila?host=localhost?port=3306")
connection = engine.connect()
metadata = db.MetaData()
payment = db.Table('payment', metadata, autoload=True, autoload_with=engine)

query = db.select([payment]).order_by(db.desc(payment.columns.amount), payment.columns.amount)

ResultProxy = connection.execute(query)
ResultSet = ResultProxy.fetchall()
ResultSet[:3]

[(8272, 305, 1, 2166, Decimal('11.99'), datetime.datetime(2005, 6, 17, 23, 51, 21), datetime.datetime(2006, 2, 15, 22, 15, 47)),
 (9803, 362, 1, 14759, Decimal('11.99'), datetime.datetime(2005, 8, 21, 23, 28, 58), datetime.datetime(2006, 2, 15, 22, 16, 57)),
 (15821, 591, 2, 4383, Decimal('11.99'), datetime.datetime(2005, 7, 7, 20, 45, 51), datetime.datetime(2006, 2, 15, 22, 23, 18))]

functions

SQL : SELECT SUM(amount) FROM payment

#SQLAlchemy :
engine = db.create_engine("mysql+pymysql://root:password@localhost/sakila?host=localhost?port=3306")
connection = engine.connect()
metadata = db.MetaData()
payment = db.Table('payment', metadata, autoload=True, autoload_with=engine)

query = db.select([db.func.sum(payment.columns.amount)])

ResultProxy = connection.execute(query)
ResultSet = ResultProxy.fetchall()
ResultSet

[(Decimal('67416.51'),)]

other functions include avg, count, min, max…

group by

SQL :

SELECT SUM(pop2008) as pop2008, sex FROM census

#SQLAlchemy :
engine = db.create_engine("mysql+pymysql://root:password@localhost/sakila?host=localhost?port=3306")
connection = engine.connect()
metadata = db.MetaData()
payment = db.Table('payment', metadata, autoload=True, autoload_with=engine)

query = db.select([db.func.sum(payment.columns.amount).label('Total Amount'), payment.columns.amount]).group_by(payment.columns.amount)

ResultProxy = connection.execute(query)
ResultSet = ResultProxy.fetchall()
ResultSet

[(Decimal('0.00'), Decimal('0.00')),
 (Decimal('2949.21'), Decimal('0.99')),
 (Decimal('1.98'), Decimal('1.98')),
 (Decimal('1273.60'), Decimal('1.99')),
 (Decimal('10590.58'), Decimal('2.99')),
 (Decimal('31.84'), Decimal('3.98')),
 (Decimal('4424.91'), Decimal('3.99')),
 (Decimal('18907.11'), Decimal('4.99')),
 (Decimal('41.86'), Decimal('5.98')),
 (Decimal('7781.01'), Decimal('5.99')),
 (Decimal('7821.81'), Decimal('6.99')),
 (Decimal('39.90'), Decimal('7.98')),
 (Decimal('5353.30'), Decimal('7.99')),
 (Decimal('8.97'), Decimal('8.97')),
 (Decimal('4360.15'), Decimal('8.99')),
 (Decimal('9.98'), Decimal('9.98')),
 (Decimal('2557.44'), Decimal('9.99')),
 (Decimal('1142.96'), Decimal('10.99')),
 (Decimal('119.90'), Decimal('11.99'))]

SQLAlchemy :¶

db.select([db.func.sum(census.columns.pop2008).label('pop2008'), census.columns.sex]).group_by(census.columns.sex) distinct

SQL : SELECT DISTINCT state FROM census

SQLAlchemy :¶

db.select([census.columns.state.distinct()]) case & cast

The case() expression accepts a list of conditions to match and the column to return if the condition matches, followed by an else_ if none of the conditions match.

cast() function to convert an expression to a particular type

example

femalepop = db.func.sum(db.case([(census.columns.sex == 'F', census.columns.pop2000)],else=0)) total_pop = db.cast(db.func.sum(census.columns.pop2000), db.Float) query = db.select([female_pop/total_pop * 100]) result = connection.execute(query).scalar() print(result)

joins

If you have two tables that already have an established relationship, you can automatically use that relationship by just adding the columns we want from each table to the select statement.

select([census.columns.pop2008, state_fact.columns.abbreviation])

example

census = db.Table('census', metadata, autoload=True, autoload_with=engine) state_fact = db.Table('state_fact', metadata, autoload=True, autoload_with=engine)

Automatic Join

query = db.select([census.columns.pop2008, state_fact.columns.abbreviation]) result = connection.execute(query).fetchall() df = pd.DataFrame(results) df.columns = results[0].keys() df.head(5)

Manual Join query = db.select([census, state_fact]) query = query.select_from(census.join(state_fact, census.columns.state == state_fact.columns.name)) results = connection.execute(query).fetchall() df = pd.DataFrame(results) df.columns = results[0].keys() df.head(5)

Creating and Inserting Data into Tables By passing the database which is not present, to the engine then sqlalchemy automatically creates a new database.

The case() expression accepts a list of conditions to match and the column to return if the condition matches, followed by an else_ if none of the conditions match.

cast() function to convert an expression to a particular type

example

Creating Database and Table¶

#SQLAlchemy :
engine = db.create_engine("mysql+pymysql://root:password@localhost/sakila?host=localhost?port=3306")
connection = engine.connect()
metadata = db.MetaData()

emp = db.Table('emp', metadata,
              db.Column('Id', db.Integer()),
              db.Column('name', db.String(255), nullable=False),
              db.Column('salary', db.Float(), default=100.0),
              db.Column('active', db.Boolean(), default=True)
              )

metadata.create_all(engine) #Creates the table

Inserting Data¶

#Inserting record one by one
query = db.insert(emp).values(Id=1, name='naveen', salary=60000.00, active=True) 
ResultProxy = connection.execute(query)

#Inserting many records at ones
query = db.insert(emp) 
values_list = [{'Id':'2', 'name':'ram', 'salary':80000, 'active':False},
               {'Id':'3', 'name':'ramesh', 'salary':70000, 'active':True}]
ResultProxy = connection.execute(query,values_list)

results = connection.execute(db.select([emp])).fetchall()
df = pd.DataFrame(results)
df.columns = results[0].keys()
df.head(4)

Updating data in Databases¶

# Build a statement to update the salary to 100000
query = db.update(emp).values(salary = 100000)
query = query.where(emp.columns.Id == 1)
results = connection.execute(query)

results = connection.execute(db.select([emp])).fetchall()
df = pd.DataFrame(results)
df.columns = results[0].keys()
df.head(4)

Delete Records¶

# Build a statement to delete where salary < 100000
query = db.delete(emp)
query = query.where(emp.columns.salary < 100000)
results = connection.execute(query)

results = connection.execute(db.select([emp])).fetchall()
df = pd.DataFrame(results)
df.columns = results[0].keys()
df.head(4)

Dropping a Table¶

engine = db.create_engine("mysql+pymysql://root:password@localhost/sakila?host=localhost?port=3306")
connection = engine.connect()
metadata = db.MetaData()
emp = db.Table('emp', metadata, autoload=True, autoload_with=engine)

print(emp.columns.keys())

#emp.drop(engine) #drops a single table

['Id', 'name', 'salary', 'active']

emp.drop(engine) #drops a single table

table_name.drop(engine) #drops a single table
metadata.drop_all(engine) #drops all the tables in the database

HOA QUACH

HOA QUACH

April 21, 2019

Querying¶

Dealing with Large ResultSet¶

Filtering data¶

SQLAlchemy :¶

SQLAlchemy :¶

Creating Database and Table¶

Inserting Data¶

Updating data in Databases¶

Delete Records¶

Dropping a Table¶

	Id	name	salary	active
0	1	naveen	60000.0	True
1	2	ram	80000.0	False
2	3	ramesh	70000.0	True

	Id	name	salary	active
0	1	naveen	100000.0	True
1	2	ram	80000.0	False
2	3	ramesh	70000.0	True