sml

import statsmodels.api as sm

x1=120

n1=1000

x2=150

n2=1200

count=[x1,x2]

nobs=[n1,n2]

z_stat,p_value=sm.stats.proportions_ztest(count,nobs)

print(f"Z-statistic: {z_stat:.4f}")

print(f"P-value: {p_value:.4f}")

if p_value<0.05:

print("Result: Reject the null hypothesis. There is a statistically significant difference in conversion rates.")

else:

print("Result: Fail to reject the null hypothesis. No significant difference in conversion rates.")

mean_a = 1000

std_a = 100

n_a = 30

mean_b = 950

std_b = 120

n_b = 30

mean_diff = mean_a - mean_b

se = ((std_a ** 2) / n_a + (std_b ** 2) / n_b) ** 0.5

t_stat = mean_diff / se

df_numerator = ((std_a ** 2) / n_a + (std_b ** 2) / n_b) ** 2

df_denominator = (((std_a ** 2) / n_a) ** 2) / (n_a - 1) + (((std_b ** 2) / n_b) ** 2) / (n_b - 1)

df = df_numerator / df_denominator

print(f"T-statistic: {t_stat:.4f}")

print(f"Approximate Degrees of Freedom: {df:.2f}")

if abs(t_stat) > 2.004:

print("Result: Reject H0 → Significant difference in sales.")

else:

print("Result: Fail to reject H0 → No significant difference in sales.")

import pandas as pd

import statsmodels.api as sm

import statsmodels.formula.api as smf

data = pd.DataFrame({

'Salary': [40000, 50000, 60000, 70000, 80000, 55000, 65000, 75000, 85000],

'Education': ['High School', 'Bachelor\'s', 'Master\'s', 'High School', 'Bachelor\'s', 'Master\'s', 'High School', 'Bachelor\'s', 'Master\'s'],

'Experience': [2, 3, 4, 5, 6, 7, 3, 5, 8]

})

data['Education'] = pd.Categorical(data['Education'], categories=['High School', "Bachelor's", "Master's"])

model = smf.ols('Salary ~ C(Education) + Experience', data=data).fit()

print(model.summary())

import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

import statsmodels.api as sm

data={'sqft':[1500,1700,1900,2000,2100,2300,2500],'price':[200000,220000,240000,260000,290000,320000,350000]}

df=pd.DataFrame(data)

df['spline']=np.where(df['sqft']>2000,df['sqft']-2000,0)

X=sm.add_constant(df[['sqft','spline']])

y=df['price']

model=sm.OLS(y,X).fit()

print(model.summary())

import numpy as np

import pandas as pd

intercept=2.5

beta_age=-0.03

beta_condition=0.5

data=pd.DataFrame({'Age':[60,60],'Condition':[1,0]})

data['log_lambda']=intercept+beta_age*data['Age']+beta_condition*data['Condition']

data['lambda']=np.exp(data['log_lambda'])

increase_pct=((data.loc[0,'lambda']-data.loc[1,'lambda'])/data.loc[1,'lambda'])*100

print(data[['Age','Condition','lambda']])

print(f"\nIncrease in expected visits due to chronic condition: {increase_pct:.2f}%")

import math

from scipy import stats

mu_0=200

x_bar=190

s=15

n=40

alpha=0.05

t_stat=(x_bar-mu_0)/(s/math.sqrt(n))

df=n-1

t_critical=stats.t.ppf(alpha,df)

p_value=stats.t.cdf(t_stat,df)

print(f"T-statistic: {t_stat:.3f}")

print(f"Critical t-value: {t_critical:.3f}")

print(f"P-value: {p_value:.5f}")

if t_stat<t_critical:

print("Reject the null hypothesis: The new recipe has significantly fewer calories.")

else:

print("Fail to reject the null hypothesis: Not enough evidence to support the claim.")

Search This Blog

BDA

sml

Comments

Post a Comment

Popular posts from this blog

pp