data:image/s3,"s3://crabby-images/2f8ae/2f8aefb5bb9e38b787a15f8afd390ce4e0d95c78" alt="portfolio-post"
import sqlalchemy as db
import pandas as pd
import numpy as np
import pymysql
engine = db.create_engine("mysql+pymysql://root:password@localhost/sakila?host=localhost?port=3306")
connection = engine.connect()
metadata = db.MetaData()
actor = db.Table('actor', metadata, autoload=True, autoload_with=engine)
# Print the column names
print(actor.columns.keys())
# Print full table metadata
print(repr(metadata.tables['actor']))
Querying¶
Table and MetaData have already been imported. The metadata is available as metadata.
#Equivalent to 'SELECT * FROM census'
query = db.select([actor])
ResultProxy = connection.execute(query)
ResultSet = ResultProxy.fetchall()
# Limit 3
ResultSet[:3]
ResultProxy: The object returned by the .execute() method. It can be used in a variety of ways to get the data returned by the query.
ResultSet: The actual data asked for in the query when using a fetch method such as .fetchall() on a ResultProxy.
Dealing with Large ResultSet¶
We use .fetchmany() to load optimal no of rows and overcome memory issues in case of large datasets
while flag:
partial_results = ResultProxy.fetchmany(50)
if(partial_results == []):
flag = False
//
code
//
ResultProxy.close()
#Convert to dataframe
df = pd.DataFrame(ResultSet)
df.columns = ResultSet[0].keys()
Filtering data¶
Lets see some examples of raw SQLite Queries and queries using SQLAlchemy.
where
SQL :
SELECT * FROM city WHERE country_id = 6
#SQLAlchemy :
engine = db.create_engine("mysql+pymysql://root:password@localhost/sakila?host=localhost?port=3306")
connection = engine.connect()
metadata = db.MetaData()
city = db.Table('city', metadata, autoload=True, autoload_with=engine)
query = db.select([city]).where(city.columns.country_id == 6)
ResultProxy = connection.execute(query)
ResultSet = ResultProxy.fetchall()
ResultSet
in
SQL :
SELECT name, city FROM customer_list WHERE country IN (Argentina, Austria)
#SQLAlchemy :
engine = db.create_engine("mysql+pymysql://root:password@localhost/sakila?host=localhost?port=3306")
connection = engine.connect()
metadata = db.MetaData()
customer_list = db.Table('customer_list', metadata, autoload=True, autoload_with=engine)
query = db.select([customer_list.columns.name, customer_list.columns.city]).where(customer_list.columns.country.in_(['Argentina', 'Austria']))
ResultProxy = connection.execute(query)
ResultSet = ResultProxy.fetchall()
ResultSet
and, or, not
SQL : SELECT * FROM customer_list WHERE country = 'Argentina' AND NOT country = 'Austria'
#SQLAlchemy :
engine = db.create_engine("mysql+pymysql://root:password@localhost/sakila?host=localhost?port=3306")
connection = engine.connect()
metadata = db.MetaData()
customer_list = db.Table('customer_list', metadata, autoload=True, autoload_with=engine)
query = db.select([customer_list]).where(db.and_(customer_list.columns.country == 'Argentina', customer_list.columns.country != 'Austria'))
ResultProxy = connection.execute(query)
ResultSet = ResultProxy.fetchall()
ResultSet
order by
SQL : SELECT * FROM payment ORDER BY amount DESC, amount LIMIT 3
#SQLAlchemy :
engine = db.create_engine("mysql+pymysql://root:password@localhost/sakila?host=localhost?port=3306")
connection = engine.connect()
metadata = db.MetaData()
payment = db.Table('payment', metadata, autoload=True, autoload_with=engine)
query = db.select([payment]).order_by(db.desc(payment.columns.amount), payment.columns.amount)
ResultProxy = connection.execute(query)
ResultSet = ResultProxy.fetchall()
ResultSet[:3]
functions
SQL : SELECT SUM(amount) FROM payment
#SQLAlchemy :
engine = db.create_engine("mysql+pymysql://root:password@localhost/sakila?host=localhost?port=3306")
connection = engine.connect()
metadata = db.MetaData()
payment = db.Table('payment', metadata, autoload=True, autoload_with=engine)
query = db.select([db.func.sum(payment.columns.amount)])
ResultProxy = connection.execute(query)
ResultSet = ResultProxy.fetchall()
ResultSet
other functions include avg, count, min, max…
group by
SQL :
SELECT SUM(pop2008) as pop2008, sex FROM census
#SQLAlchemy :
engine = db.create_engine("mysql+pymysql://root:password@localhost/sakila?host=localhost?port=3306")
connection = engine.connect()
metadata = db.MetaData()
payment = db.Table('payment', metadata, autoload=True, autoload_with=engine)
query = db.select([db.func.sum(payment.columns.amount).label('Total Amount'), payment.columns.amount]).group_by(payment.columns.amount)
ResultProxy = connection.execute(query)
ResultSet = ResultProxy.fetchall()
ResultSet
The case() expression accepts a list of conditions to match and the column to return if the condition matches, followed by an else_ if none of the conditions match.
cast() function to convert an expression to a particular type
example
femalepop = db.func.sum(db.case([(census.columns.sex == 'F', census.columns.pop2000)],else=0)) total_pop = db.cast(db.func.sum(census.columns.pop2000), db.Float) query = db.select([female_pop/total_pop * 100]) result = connection.execute(query).scalar() print(result)
joins
If you have two tables that already have an established relationship, you can automatically use that relationship by just adding the columns we want from each table to the select statement.
select([census.columns.pop2008, state_fact.columns.abbreviation])
example
census = db.Table('census', metadata, autoload=True, autoload_with=engine) state_fact = db.Table('state_fact', metadata, autoload=True, autoload_with=engine)
Automatic Join
query = db.select([census.columns.pop2008, state_fact.columns.abbreviation]) result = connection.execute(query).fetchall() df = pd.DataFrame(results) df.columns = results[0].keys() df.head(5)
Manual Join query = db.select([census, state_fact]) query = query.select_from(census.join(state_fact, census.columns.state == state_fact.columns.name)) results = connection.execute(query).fetchall() df = pd.DataFrame(results) df.columns = results[0].keys() df.head(5)
Creating and Inserting Data into Tables By passing the database which is not present, to the engine then sqlalchemy automatically creates a new database.
The case() expression accepts a list of conditions to match and the column to return if the condition matches, followed by an else_ if none of the conditions match.
cast() function to convert an expression to a particular type
example
Creating Database and Table¶
#SQLAlchemy :
engine = db.create_engine("mysql+pymysql://root:password@localhost/sakila?host=localhost?port=3306")
connection = engine.connect()
metadata = db.MetaData()
emp = db.Table('emp', metadata,
db.Column('Id', db.Integer()),
db.Column('name', db.String(255), nullable=False),
db.Column('salary', db.Float(), default=100.0),
db.Column('active', db.Boolean(), default=True)
)
metadata.create_all(engine) #Creates the table
Inserting Data¶
#Inserting record one by one
query = db.insert(emp).values(Id=1, name='naveen', salary=60000.00, active=True)
ResultProxy = connection.execute(query)
#Inserting many records at ones
query = db.insert(emp)
values_list = [{'Id':'2', 'name':'ram', 'salary':80000, 'active':False},
{'Id':'3', 'name':'ramesh', 'salary':70000, 'active':True}]
ResultProxy = connection.execute(query,values_list)
results = connection.execute(db.select([emp])).fetchall()
df = pd.DataFrame(results)
df.columns = results[0].keys()
df.head(4)
Updating data in Databases¶
# Build a statement to update the salary to 100000
query = db.update(emp).values(salary = 100000)
query = query.where(emp.columns.Id == 1)
results = connection.execute(query)
results = connection.execute(db.select([emp])).fetchall()
df = pd.DataFrame(results)
df.columns = results[0].keys()
df.head(4)
Delete Records¶
# Build a statement to delete where salary < 100000
query = db.delete(emp)
query = query.where(emp.columns.salary < 100000)
results = connection.execute(query)
results = connection.execute(db.select([emp])).fetchall()
df = pd.DataFrame(results)
df.columns = results[0].keys()
df.head(4)
Dropping a Table¶
engine = db.create_engine("mysql+pymysql://root:password@localhost/sakila?host=localhost?port=3306")
connection = engine.connect()
metadata = db.MetaData()
emp = db.Table('emp', metadata, autoload=True, autoload_with=engine)
print(emp.columns.keys())
#emp.drop(engine) #drops a single table
emp.drop(engine) #drops a single table
table_name.drop(engine) #drops a single table
metadata.drop_all(engine) #drops all the tables in the database