-
Notifications
You must be signed in to change notification settings - Fork 0
/
County Competition Review
131 lines (111 loc) · 6.56 KB
/
County Competition Review
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# Dependencies:
# - pandas: For data manipulation and analysis
# - openpyxl: For creating and editing Excel files with pandas
# Instructions for installing dependencies:
# 1. Install pandas:
# pip install pandas
#
# 2. Install openpyxl:
# pip install openpyxl
import pandas as pd
import os
# Define the path to the directory containing county CSV files on the network drive
directory = r'\\fs\Engineering\Cost Quest Raw Data\V7\Merged CSV Files for Import\county_data'
# Define the path to the analysis directory
analysis_directory = os.path.join(directory, 'analysis')
# FIPS to County mapping
fips_to_county = {
'18001': 'Adams', '18003': 'Allen', '18005': 'Bartholomew', '18007': 'Benton', '18009': 'Blackford',
'18011': 'Boone', '18013': 'Brown', '18015': 'Carroll', '18017': 'Cass', '18019': 'Clark',
'18021': 'Clay', '18023': 'Clinton', '18025': 'Crawford', '18027': 'Daviess', '18029': 'Dearborn',
'18031': 'Decatur', '18033': 'De Kalb', '18035': 'Delaware', '18037': 'Dubois', '18039': 'Elkhart',
'18041': 'Fayette', '18043': 'Floyd', '18045': 'Fountain', '18047': 'Franklin', '18049': 'Fulton',
'18051': 'Gibson', '18053': 'Grant', '18055': 'Greene', '18057': 'Hamilton', '18059': 'Hancock',
'18061': 'Harrison', '18063': 'Hendricks', '18065': 'Henry', '18067': 'Howard', '18069': 'Huntington',
'18071': 'Jackson', '18073': 'Jasper', '18075': 'Jay', '18077': 'Jefferson', '18079': 'Jennings',
'18081': 'Johnson', '18083': 'Knox', '18085': 'Kosciusko', '18087': 'Lagrange', '18089': 'Lake',
'18091': 'La Porte', '18093': 'Lawrence', '18095': 'Madison', '18097': 'Marion', '18099': 'Marshall',
'18101': 'Martin', '18103': 'Miami', '18105': 'Monroe', '18107': 'Montgomery', '18109': 'Morgan',
'18111': 'Newton', '18113': 'Noble', '18115': 'Ohio', '18117': 'Orange', '18119': 'Owen',
'18121': 'Parke', '18123': 'Perry', '18125': 'Pike', '18127': 'Porter', '18129': 'Posey',
'18131': 'Pulaski', '18133': 'Putnam', '18135': 'Randolph', '18137': 'Ripley', '18139': 'Rush',
'18141': 'St. Joseph', '18143': 'Scott', '18145': 'Shelby', '18147': 'Spencer', '18149': 'Starke',
'18151': 'Steuben', '18153': 'Sullivan', '18155': 'Switzerland', '18157': 'Tippecanoe', '18159': 'Tipton',
'18161': 'Union', '18163': 'Vanderburgh', '18165': 'Vermillion', '18167': 'Vigo', '18169': 'Wabash',
'18171': 'Warren', '18173': 'Warrick', '18175': 'Washington', '18177': 'Wayne', '18179': 'Wells',
'18181': 'White', '18183': 'Whitley'
}
# Define the counties to process first
initial_counties = ['18103', '18061', '18153', '18139', '18021', '18015', '18117', '18171', '18073', '18051']
# Function to process a single file and return the results as a DataFrame
def process_county_file(filepath, county_name):
# Load the data for the specified county with specified dtype for problematic columns
data = pd.read_csv(filepath, dtype={'BDC_SatelliteProviders': str}, low_memory=False)
# Perform the analysis for the specified county
total_passings = data['Location_id'].nunique()
# Define a function to calculate passings and market share for a provider type
def calculate_market_share(provider_column):
provider_counts = {}
for entry in data[provider_column].dropna():
providers = entry.split('|')
for provider in providers:
provider = provider.strip()
if provider in provider_counts:
provider_counts[provider] += 1
else:
provider_counts[provider] = 1
provider_details = []
for provider, count in provider_counts.items():
if count >= 10:
market_share = (count / total_passings) * 100
if market_share >= 1:
provider_details.append((provider, count, market_share))
return provider_details
# Get the details for each provider type
fiber_providers = calculate_market_share('BDC_FiberProviders')
cable_providers = calculate_market_share('BDC_CableProviders')
fw_unlicensed_providers = calculate_market_share('BDC_FWUnlicensedProviders')
telco_providers = calculate_market_share('BDC_TelcoProviders')
# Store the results in a structured format
results = []
results.append(('Total Passings', total_passings, ''))
results.append(('Fiber Providers', 'Passings', 'Market Share (%)'))
for provider, passings, market_share in fiber_providers:
results.append((provider, passings, f"{market_share:.2f}%"))
results.append(('Cable Providers', 'Passings', 'Market Share (%)'))
for provider, passings, market_share in cable_providers:
results.append((provider, passings, f"{market_share:.2f}%"))
results.append(('Fixed Wireless Providers', 'Passings', 'Market Share (%)'))
for provider, passings, market_share in fw_unlicensed_providers:
results.append((provider, passings, f"{market_share:.2f}%"))
results.append(('Telco Providers', 'Passings', 'Market Share (%)'))
for provider, passings, market_share in telco_providers:
results.append((provider, passings, f"{market_share:.2f}%"))
# Convert the results list to a DataFrame
report_df = pd.DataFrame(results, columns=['Provider', 'Passings', 'Percent of Market'])
return report_df
# Create an Excel writer object
workbook_file = os.path.join(analysis_directory, 'county_detailed_competition_report.xlsx')
with pd.ExcelWriter(workbook_file, engine='openpyxl') as writer:
# Process the initial counties first
for fips in initial_counties:
county_name = fips_to_county[fips]
filename = f'county_{fips}.csv'
filepath = os.path.join(directory, filename)
if os.path.exists(filepath):
print(f"Processing file: {filename} for county: {county_name}")
county_df = process_county_file(filepath, county_name)
# Add the DataFrame to the workbook
county_df.to_excel(writer, sheet_name=county_name, index=False)
# Process all other counties
for fips, county_name in fips_to_county.items():
if fips not in initial_counties:
filename = f'county_{fips}.csv'
filepath = os.path.join(directory, filename)
if os.path.exists(filepath):
print(f"Processing file: {filename} for county: {county_name}")
county_df = process_county_file(filepath, county_name)
# Add the DataFrame to the workbook
county_df.to_excel(writer, sheet_name=county_name, index=False)
print(f"Workbook saved to {workbook_file}")
print("Processing complete.")