| Hosted by CoCalc | Download
len('Create a detailed Socrata datasets catalog - Fun with SODA')
58
import os os.system('wget -O domains.csv https://gsa.github.io/data/dotgov-domains/2014-12-01-full.csv') import csv import sys domains = {} f = open('domains.csv', 'rt') try: reader = csv.reader(f) next(reader, None) for row in reader: domains[row[0].lower()] = row[-2] + ', '+ row[-1] finally: f.close() len(domains)
0 ['Domain Name', 'Domain Type', 'Agency', 'City', 'State'] 5320
import requests import os import re portals = [] results = requests.get('http://api.us.socrata.com/api/catalog/v1/domains').json()['results'] print '# of results', len(results) print 'pass get' domains = dict(domains) for portal in results: #print portal d = {'domain': str(portal['domain'])} if d['domain'].endswith('.gov'): for gov_domain in domains: #print gov_domain if gov_domain in d['domain']: d['location'] = str(domains[gov_domain]) break else: root_domain = d['domain'].split('.') root_domain = root_domain[-2]+'.'+root_domain[-1] whois = os.popen('whois %s' % root_domain).read() m = re.search('Billing Contact City:\s*(?P<city>.*)', whois) if m: city = m.group('city') m = re.search('Billing Contact State/Province:\s*(?P<state>.*)', whois) if m: state = m.group('state') if city and state: d['location'] = '%s, %s' % (city, state) if 'location' in d: #print 'http://nominatim.openstreetmap.org/search/?q=%s,usa&format=json' % (d['location']) geo = requests.get('http://nominatim.openstreetmap.org/search/?q=%s,usa&format=json' % (d['location'])).json() if not len(geo) > 0: continue geo = geo[0] d['lat'] = geo['lat'] d['lon'] = geo['lon'] portals.append(d) portals_backup = portals
# of results 216 pass get
︠956fb298-c25b-4a74-b7d5-a088f10be5f3︠ portals = portals_backup portals = [item.items() for item in portals] portals = [dict([map(str, t) for t in item]) for item in portals] for d in portals: d['lat'] = float(d['lat']) d['lon'] = float(d['lon']) locations = str(portals) s = """ <title>Map of Socrata Data</title> <script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.3/jquery.min.js"></script> <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootswatch/3.3.6/cyborg/bootstrap.min.css"/> <style> #map { height: 500px; } .infowindowcontent { max-height:200px; max-width:200px; overflow:auto; } </style> <nav class="navbar navbar-default"> <div class="container-fluid"> <div class="navbar-header"> <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#bs-example-navbar-collapse-1"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar"></span> <span class="icon-bar"></span> <span class="icon-bar"></span> </button> <a class="navbar-brand" href="#">DataNearMe</a> </div> <div class="collapse navbar-collapse" id="bs-example-navbar-collapse-1"> <ul class="nav navbar-nav"> <li class="active"><a href="#">Link <span class="sr-only">(current)</span></a></li> <li><a href="#">Link</a></li> <li class="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Dropdown <span class="caret"></span></a> <ul class="dropdown-menu" role="menu"> <li><a href="#">Action</a></li> <li><a href="#">Another action</a></li> <li><a href="#">Something else here</a></li> <li class="divider"></li> <li><a href="#">Separated link</a></li> <li class="divider"></li> <li><a href="#">One more separated link</a></li> </ul> </li> </ul> <form class="navbar-form navbar-left" role="search"> <div class="form-group"> <input type="text" class="form-control" placeholder="Search"> </div> <button type="submit" class="btn btn-default">Submit</button> </form> <ul class="nav navbar-nav navbar-right"> <li><a href="#">Link</a></li> </ul> </div> </div> </nav> <div id="map"></div> <script> var locations = %s; var map; function getDistance(pointA, pointB) { lat1 = pointA['lat'] lat2 = pointB['lat'] lon1 = pointA['lon'] lon2 = pointB['lon'] var R = 6371000; // metres var φ1 = lat1.toRadians(); var φ2 = lat2.toRadians(); var Δφ = (lat2-lat1).toRadians(); var Δλ = (lon2-lon1).toRadians(); var a = Math.sin(Δφ/2) * Math.sin(Δφ/2) + Math.cos(φ1) * Math.cos(φ2) * Math.sin(Δλ/2) * Math.sin(Δλ/2); var c = 2 * Math.atan2(Math.sqrt(a), Math.sqrt(1-a)); var d = R * c; return d; } function geocodeAddress(geocoder, map, address) { //var address = document.getElementById('address').value; geocoder.geocode({'address': address}, function(results, status) { if (status === google.maps.GeocoderStatus.OK) { //resultsMap.setCenter(results[0].geometry.location); var marker = new google.maps.Marker({ map: map, position: results[0].geometry.location }); } else { alert('Geocode was not successful for the following reason: ' + status); } }); } function showCatalogForDomain(domain, data) { var categoriesToViews = {}; var html = '<strong># of datasets: '+data['results'].length; $.each(data['results'], function(j, value) { var total_page_views = value['resource']['view_count']['page_views_total']; //$.each }); $('#content_for_'+domain.replace(/\./g,'_')).html(html); } function getCatalogForDomain(domain) { $.get('https://api.us.socrata.com/api/catalog/v1?domains='+domain+'&only=datasets&limit=6000', function(data) { showCatalogForDomain(domain, data); }); } function initMap() { var geocoder = new google.maps.Geocoder(); map = new google.maps.Map(document.getElementById('map'), { center: {lat: -34.397, lng: 150.644}, zoom: 4 }); map.setCenter(new google.maps.LatLng(41.850033, -87.6500523)); infowindow = new google.maps.InfoWindow(); for (var i=0;i<locations.length;i++) { var contentString = '<div id="content">'+ '<div id="siteNotice">'+ '</div>'+ '<h1 id="firstHeading" class="firstHeading"><a href="http://'+locations[i]['domain']+'">'+locations[i]['domain']+'</a></h1>'+ '<div id="content_for_'+locations[i]['domain'].replace(/\./g,'_')+'" class="infowindowcontent">'+ '</div>'+ '</div>'; var marker = new google.maps.Marker({ map: map, position: {'lat': locations[i]['lat'], 'lng': locations[i]['lon']}, title: locations[i]['domain'], domain: locations[i]['domain'] }); marker.content = contentString; var infoWindow = new google.maps.InfoWindow(); google.maps.event.addListener(marker, 'click', function () { infoWindow.setContent(this.content); infoWindow.open(this.getMap(), this); getCatalogForDomain(this.domain); }); //setTimeout(function(i) {geocodeAddress(geocoder, map, locations[i]['location'])}, i*200, i); } } </script> <script src="https://maps.googleapis.com/maps/api/js?key=AIzaSyAA4gc2PxM5xNEwehLnUIW2MppwprXwm14&callback=initMap" async defer></script> """ % (locations) #html(s, hide=False) with open('socrata_portals_map.html', 'w') as f: f.write(s)