sml
import statsmodels.api as sm
x1=120
n1=1000
x2=150
n2=1200
count=[x1,x2]
nobs=[n1,n2]
z_stat,p_value=sm.stats.proportions_ztest(count,nobs)
print(f"Z-statistic: {z_stat:.4f}")
print(f"P-value: {p_value:.4f}")
if p_value<0.05:
print("Result: Reject the null hypothesis. There is a statistically significant difference in conversion rates.")
else:
print("Result: Fail to reject the null hypothesis. No significant difference in conversion rates.")
mean_a = 1000
std_a = 100
n_a = 30
mean_b = 950
std_b = 120
n_b = 30
mean_diff = mean_a - mean_b
se = ((std_a ** 2) / n_a + (std_b ** 2) / n_b) ** 0.5
t_stat = mean_diff / se
df_numerator = ((std_a ** 2) / n_a + (std_b ** 2) / n_b) ** 2
df_denominator = (((std_a ** 2) / n_a) ** 2) / (n_a - 1) + (((std_b ** 2) / n_b) ** 2) / (n_b - 1)
df = df_numerator / df_denominator
print(f"T-statistic: {t_stat:.4f}")
print(f"Approximate Degrees of Freedom: {df:.2f}")
if abs(t_stat) > 2.004:
print("Result: Reject H0 → Significant difference in sales.")
else:
print("Result: Fail to reject H0 → No significant difference in sales.")
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
data = pd.DataFrame({
'Salary': [40000, 50000, 60000, 70000, 80000, 55000, 65000, 75000, 85000],
'Education': ['High School', 'Bachelor\'s', 'Master\'s', 'High School', 'Bachelor\'s', 'Master\'s', 'High School', 'Bachelor\'s', 'Master\'s'],
'Experience': [2, 3, 4, 5, 6, 7, 3, 5, 8]
})
data['Education'] = pd.Categorical(data['Education'], categories=['High School', "Bachelor's", "Master's"])
model = smf.ols('Salary ~ C(Education) + Experience', data=data).fit()
print(model.summary())
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
data={'sqft':[1500,1700,1900,2000,2100,2300,2500],'price':[200000,220000,240000,260000,290000,320000,350000]}
df=pd.DataFrame(data)
df['spline']=np.where(df['sqft']>2000,df['sqft']-2000,0)
X=sm.add_constant(df[['sqft','spline']])
y=df['price']
model=sm.OLS(y,X).fit()
print(model.summary())
import numpy as np
import pandas as pd
intercept=2.5
beta_age=-0.03
beta_condition=0.5
data=pd.DataFrame({'Age':[60,60],'Condition':[1,0]})
data['log_lambda']=intercept+beta_age*data['Age']+beta_condition*data['Condition']
data['lambda']=np.exp(data['log_lambda'])
increase_pct=((data.loc[0,'lambda']-data.loc[1,'lambda'])/data.loc[1,'lambda'])*100
print(data[['Age','Condition','lambda']])
print(f"\nIncrease in expected visits due to chronic condition: {increase_pct:.2f}%")
import math
from scipy import stats
mu_0=200
x_bar=190
s=15
n=40
alpha=0.05
t_stat=(x_bar-mu_0)/(s/math.sqrt(n))
df=n-1
t_critical=stats.t.ppf(alpha,df)
p_value=stats.t.cdf(t_stat,df)
print(f"T-statistic: {t_stat:.3f}")
print(f"Critical t-value: {t_critical:.3f}")
print(f"P-value: {p_value:.5f}")
if t_stat<t_critical:
print("Reject the null hypothesis: The new recipe has significantly fewer calories.")
else:
print("Fail to reject the null hypothesis: Not enough evidence to support the claim.")
Comments
Post a Comment