In [1]:
import agate

#table = agate.Table.from_csv('examples/realdata/ks_1033_data.csv')
table = agate.Table.from_csv('ks_1033_data.csv')


print(table)
|-------------------------------+---------------|
|  column_names                 | column_types  |
|-------------------------------+---------------|
|  state                        | Text          |
|  county                       | Text          |
|  fips                         | Number        |
|  nsn                          | Text          |
|  item_name                    | Text          |
|  quantity                     | Number        |
|  ui                           | Text          |
|  acquisition_cost             | Number        |
|  total_cost                   | Number        |
|  ship_date                    | DateTime      |
|  federal_supply_category      | Number        |
|  federal_supply_category_name | Text          |
|  federal_supply_class         | Number        |
|  federal_supply_class_name    | Text          |
|-------------------------------+---------------|

Question 1: What was the total cost to Kansas City area counties?

To answer this question, we first must filter the table to only those rows which refer to a Kansas City area county.

In [2]:
kansas_city = table.where(lambda r: r['county'] in ('JACKSON', 'CLAY', 'CASS', 'PLATTE'))

print(len(table.rows))
print(len(kansas_city.rows))
1575
15

We can then print the Sum of the costs of all those rows. (The cost column is named total_cost.)

In [3]:
print('$%d' % kansas_city.columns['total_cost'].aggregate(agate.Sum()))
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-3-2f0757b12ce8> in <module>()
----> 1 print('$%d' % kansas_city.columns['total_cost'].aggregate(agate.Sum()))

AttributeError: 'Column' object has no attribute 'aggregate'

Question 2: Which counties spent the most?

This question is more complicated. First we group the data by county, which gives us a TableSet named counties. A TableSet is a group of tables with the same columns.

In [4]:
# Group by county
counties = table.group_by('county')

print(counties.keys())
(u'ALLEN', u'ANDERSON', u'BARTON', u'BOURBON', u'BROWN', u'BUTLER', u'CHAUTAUQUA', u'CHEROKEE', u'CHEYENNE', u'CLAY', u'COFFEY', u'COMANCHE', u'COWLEY', u'CRAWFORD', u'DECATUR', u'DICKINSON', u'DOUGLAS', u'ELLIS', u'ELLSWORTH', u'FINNEY', u'FORD', u'FRANKLIN', u'GEARY', u'PRATT', u'GRAHAM', u'GRANT', u'GRAY', u'GREELEY', u'GREENWOOD', u'HAMILTON', u'HARVEY', u'HASKELL', u'JACKSON', u'JEFFERSON', u'JOHNSON', u'KIOWA', u'LABETTE', u'LEAVENWORTH', u'LINN', u'LOGAN', u'LYON', u'MARION', u'MARSHALL', u'MCPHERSON', u'MEADE', u'MIAMI', u'MONTGOMERY', u'NEMAHA', u'NEOSHO', u'NORTON', u'OSAGE', u'OTTAWA', u'PAWNEE', u'POTTAWATOMIE', u'RAWLINS', u'RENO', u'RICE', u'RILEY', u'ROOKS', u'SALINE', u'SCOTT', u'SEDGWICK', u'SHAWNEE', u'SHERMAN', u'SMITH', u'STAFFORD', u'SUMNER', u'THOMAS', u'WABAUNSEE', u'WICHITA', u'WILSON', u'WOODSON', u'WYANDOTTE')

We then use the aggregate function to sum the total_cost column for each table in the group. The resulting values are collapsed into a new table, totals, which has a row for each county and a column named total_cost_sum containing the new total.

In [5]:
# Aggregate totals for all counties
totals = counties.aggregate([
    ('total_cost', agate.Sum(), 'total_cost_sum')
])

print(totals.column_names)
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-5-2200fdc8f271> in <module>()
      1 # Aggregate totals for all counties
      2 totals = counties.aggregate([
----> 3     ('total_cost', agate.Sum(), 'total_cost_sum')
      4 ])
      5 

TypeError: __init__() takes exactly 2 arguments (1 given)

Finally, we sort the counties by their total cost, limit the results to the top 10 and then print the results as a text bar chart.

In [6]:
totals.order_by('total_cost_sum', reverse=True).limit(20).print_bars('county', 'total_cost_sum', width=100)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-6-ba800fb09e95> in <module>()
----> 1 totals.order_by('total_cost_sum', reverse=True).limit(20).print_bars('county', 'total_cost_sum', width=100)

NameError: name 'totals' is not defined
In [ ]:
 
In [7]:
import agate

purchases = agate.Table.from_csv('ks_1033_data.csv')

by_county = purchases.group_by('county')

totals = by_county.aggregate([
    ('county_cost', agate.Sum('total_cost'))
])

totals = totals.order_by('county_cost', reverse=True)
totals.limit(10).print_bars('county', 'county_cost', width=80)
# https://source.opennews.org/en-US/articles/introducing-agate/
county     county_cost
SEDGWICK    977,174.45 ▓░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 
COFFEY      691,749.03 ▓░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░                 
MONTGOMERY  447,581.20 ▓░░░░░░░░░░░░░░░░░░░░░░░░░                               
JOHNSON     420,628.00 ▓░░░░░░░░░░░░░░░░░░░░░░░░                                
SALINE      245,450.24 ▓░░░░░░░░░░░░░░                                          
FINNEY      171,862.20 ▓░░░░░░░░░░                                              
BROWN       145,254.96 ▓░░░░░░░░                                                
KIOWA        97,974.00 ▓░░░░░                                                   
WILSON       74,747.10 ▓░░░░                                                    
FORD         70,780.00 ▓░░░░                                                    
                       +-------------+-------------+-------------+-------------+
                       0          250,000       500,000       750,000  1,000,000
In [ ]: