Original size 1140x1600

Анализ информации касающейся качества интернета

The project is taking part in the competition

Качество и свобода доступа в интернет в последнее время стала очень насущной и обсуждаемой проблемой в настоящее время, поэтому я решила проанализировать госданные касающиеся именно этой темы. Я нашла информацию на портале открытых данных правительства Москвы.

import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from scipy.stats import pearsonr, ttest_ind import warnings warnings.filterwarnings ('ignore')

Устанавливаем общий стиль графиков

sns.set_style («whitegrid») palette = ['

2E86AB', '

A23B72', '

F18F01', '

C73E1D', '#6A994E'] sns.set_palette (palette) plt.rcParams['figure.figsize'] = (12, 7) plt.rcParams['font.size'] = 11

Заменяем 'your_dataset.csv' на свой файл! df = pd.read_csv ('your_dataset.csv') ← Изменяем эту строку

print («\n Ищу числовые столбцы для анализа…») numeric_cols = df.select_dtypes (include=[np.number]).columns.tolist ()

if len (numeric_cols) < 2: print («Найден только 1 числовой столбец. Добавь больше!») exit ()

print (f» Найдено {len (numeric_cols)} числовых столбцов: {numeric_cols}»)

Выбираем первые 4 числовых столбца для анализа

target_col = numeric_cols[0] # Первая колонка = цель features = numeric_cols[1:5] # Следующие 4 = признаки

print (f"\n Целевая переменная: {target_col}») print (f» Анализирую: {features}»)

График 1

print («\n📈 Создаю График 1: Scatter Plot…») x_col = features[0] r, p = pearsonr (df[x_col], df[target_col])

plt.figure (figsize=(12, 7)) plt.scatter (df[x_col], df[target_col], alpha=0.6, s=60, color=palette[0])

Линия тренда

z = np.polyfit (df[x_col], df[target_col], 1) p_line = np.poly1d (z) x_sorted = np.sort (df[x_col]) plt.plot (x_sorted, p_line (x_sorted), «r--», linewidth=3, label=f’r={r:.3f}, p={p:.3f}')

plt.xlabel (f'{x_col}', fontsize=12, fontweight='bold') plt.ylabel (f'{target_col}', fontsize=12, fontweight='bold') plt.title (f'📊 График 1: {x_col} vs {target_col}\n (корреляция r={r:.3f})', fontsize=14, fontweight='bold', pad=20) plt.legend () plt.grid (True, alpha=0.3) plt.tight_layout () plt.savefig ('01_scatter.png', dpi=300, bbox_inches='tight') plt.close () print (f"✅ 01_scatter.png (r={r:.3f})»)

График 2

print («📊 Создаю График 2: Histogram…») mean_val = df[target_col].mean () median_val = df[target_col].median ()

plt.figure (figsize=(12, 7)) plt.hist (df[target_col], bins=30, edgecolor='black', alpha=0.7, color=palette[1]) plt.axvline (mean_val, color='red', linestyle='--', linewidth=2.5, label=f’Среднее={mean_val:.2f}') plt.axvline (median_val, color='green', linestyle='--', linewidth=2.5, label=f’Медиана={median_val:.2f}')

plt.xlabel (target_col) plt.ylabel ('Количество') plt.title (f'📈 График 2: Распределение {target_col}', fontsize=14, fontweight='bold') plt.legend () plt.grid (axis='y', alpha=0.3) plt.tight_layout () plt.savefig ('02_histogram.png', dpi=300, bbox_inches='tight') plt.close () print («✅ 02_histogram.png»)

График 3

print («🔥 Создаю График 3: Heatmap корреляций…») plt.figure (figsize=(10, 8)) corr_matrix = df[numeric_cols[: 6]].corr () # Первые 6 колонок sns.heatmap (corr_matrix, annot=True, fmt='.2f', cmap='coolwarm', center=0, square=True, cbar_kws={"shrink»: 0.8}) plt.title ('📊 График 3: Матрица корреляций всех переменных', fontsize=14, fontweight='bold') plt.tight_layout () plt.savefig ('03_heatmap.png', dpi=300, bbox_inches='tight') plt.close () print («✅ 03_heatmap.png»)

График 4

print («📦 Создаю График 4: Boxplot…»)

Ищем категориальный столбец

cat_cols = df.select_dtypes (include=['object', 'category']).columns if len (cat_cols) > 0: cat_col = cat_cols[0] top_cats = df[cat_col].value_counts ().head (5).index df_filtered = df[df[cat_col].isin (top_cats)]

plt.figure (figsize=(12, 7))
df_filtered.boxplot (column=target_col, by=cat_col,
                    patch_artist=True, fontsize=10)
plt.suptitle ('') # Убираем автозаголовок
plt.title (f'📦 График 4: {target_col} по группам {cat_col}',
          fontsize=14, fontweight='bold')
plt.ylabel (target_col)
plt.xticks (rotation=45)
plt.tight_layout ()
plt.savefig ('04_boxplot.png', dpi=300, bbox_inches='tight')
plt.close ()
print (f"✅ 04_boxplot.png ({cat_col})»)

else: # Если нет категорий — делаем по квартилям df['quartile'] = pd.qcut (df[target_col], 4, labels=['Q1', 'Q2', 'Q3', 'Q4']) plt.figure (figsize=(10, 7)) df.boxplot (column=features[0], by='quartile', ax=plt.gca ()) plt.title (f'📦 График 4: {features[0]} по квартилям {target_col}', fontsize=14, fontweight='bold') plt.suptitle ('') plt.tight_layout () plt.savefig ('04_boxplot.png', dpi=300, bbox_inches='tight') plt.close () print («✅ 04_boxplot.png (квартили)»)

Aksinya Savchenko

data visualization

We use cookies to improve the operation of the website and to enhance its usability. More detailed information on the use of cookies can be fo...