% File "Fit_Main" is used as a computing and plotting space for the
% problem at hand, and calls functions as needed.

% Note: For clarity's sake, the wild-type, mutant 1, and mutant 2 variables are kept 
% separate throughout the code for Parts 1 and 2. However, they could readily be bundled 
% into multidimensional matrices for cleaner code, as in Part 3.

clear all;  % rid old parameter values, etc.

%% INPUT DATA BELOW

% Ligand concentrations (L) should all be in uM.
% Input normalized fluorescence signals (S) rather than raw data. To avoid divide
% by zero errors, put 0.9999 and 0.0001 in place of 1 and 0, respectively.
% "wt" refers to wild-type, and "m" to mutant. 

L= [x x x x x x x x x x x x];  

S_wt = [x x x x x x x x x x x x];
S_m1 = [x x x x x x x x x x x x];
S_m2 = [x x x x x x x x x x x x];

% Below we initialize an array of ligand concentrations values for use in
% our models. The values are in the appropriate range to compare fits to data.

L_space = logspace(-4, 4, 10000);    

%% PART 1: Fit for single KD value using a simple model.

% This model ignores the fact of multiple binding sites, but will provide a 
% useful introduction to our modeling methods.

C0 = 0;                              % initialize KD parameter as zero.

% Binding fractions are calculated from fluorescence below. 
% Why can't the fluorescence data be used directly?

Y_wt = 1 - S_wt;
Y_m1 = 1 - S_m1;
Y_m2 = 1 - S_m2;

% Now we use the nonlinear fitting tool, nlinfit, to determine KD
% values, according to the model in function "Fit_SingleKD."
% The parameter C_x returns KD, and r returns residuals. Residuals show
% the difference between the model and data for any given data point.

[C_wt, r_wt] = nlinfit(L, Y_wt, @Fit_SingleKD, C0);
[C_m1, r_m1] = nlinfit(L, Y_m1, @Fit_SingleKD, C0);
[C_m2, r_m2] = nlinfit(L, Y_m2, @Fit_SingleKD, C0);

% Output KD values so they show up in the command window.

KD1_wt = C_wt
KD1_m1 = C_m1
KD1_m2 = C_m2

% Using the same model as in "Fit_SingleKD," we prepare a vector of 
% values for the binding fraction, using the chosen ligand-space. 
% Then we can compare the fit to the raw signal data.

fitY_wt = L_space./(L_space + KD1_wt);
fitY_m1 = L_space./(L_space + KD1_m1);
fitY_m2 = L_space./(L_space + KD1_m2);

% Finally, the wild-type and mutant data points and model curves are plotted.
% Try changing the legend to reflect the names of your mutants (X#Z).

figure
subplot(3,1,1)
  semilogx(L, Y_wt, 'x')
hold on
  semilogx(L_space, fitY_wt)
xlabel('log [L] (\mu M)')
ylabel('binding Y')
legend('wild type data', 'wild type fit')
title('Part 1: Simple KD fit')
subplot(3,1,2)
  semilogx(L, Y_m1, '+')
hold on
  semilogx(L_space, fitY_m1, ':')
xlabel('log [L] (\mu M)')
ylabel('binding Y')
legend('mut1 data', 'mut1 fit')
subplot(3,1,3)
  semilogx(L, Y_m2, 'o')
hold on
  semilogx(L_space, fitY_m2, '-')
xlabel('log [L] (\mu M)')
ylabel('binding Y')
legend('mut2 data', 'mut2 fit')

% The residuals are plotted as well, to give a sense of the accuracy
% and appropriateness of the model.

figure
  semilogx(L, r_wt, 'x')
hold on
  semilogx(L, r_m1, '+')
hold on
  semilogx(L, r_m2, 'o')
hold on
  semilogx(L_space, 0, ':')
title('Residuals from fitting binding data')
xlabel('log [L] (\mu M)')
ylabel('residual')
legend('wild type', 'mutant 1', 'mutant 2')

%% PART 2: Model for single KD plus Hill coefficient, Method 1

% We will now use the same strategy as above, but we will call a
% function that should more appropriately model the behaviour of
% inverse pericam, namely "Fit_KDn," a two-parameter model. The 
% parameter KDn_x returns a value for KD as well as one for the 
% Hill coefficient n. As before, r2 returns residuals. 

C= [0 1];

[KDn_wt, r2_wt] = nlinfit(L, Y_wt, @Fit_KDn, C);
[KDn_m1, r2_m1] = nlinfit(L, Y_m1, @Fit_KDn, C);
[KDn_m2, r2_m2] = nlinfit(L, Y_m2, @Fit_KDn, C);

% Output KD and n values so they show up in the command window.

KD2_wt = KDn_wt(1)
n_wt = KDn_wt(2)

KD2_m1 = KDn_m1(1)
n_m1 = KDn_m1(2)

KD2_m2 = KDn_m2(1)
n_m2 = KDn_m2(2)

% Using the same model as in "Fit_KDn," we prepare a vector of 
% values for the binding fraction, using the chosen ligand-space. 
% Then we can compare the fit to the raw signal data.

fitY2_wt = (L_space.^n_wt)./(L_space.^n_wt + KD2_wt.^n_wt);
fitY2_m1 = (L_space.^n_m1)./(L_space.^n_m1 + KD2_m1.^n_m1);
fitY2_m2 = (L_space.^n_m2)./(L_space.^n_m2 + KD2_m2.^n_m2);

% Finally, the wild-type and mutant data points and model curves are plotted.

figure
subplot(3,1,1)
  semilogx(L, Y_wt, 'x')
hold on
  semilogx(L_space, fitY2_wt)
xlabel('log [L] (\mu M)')
ylabel('binding Y')
legend('wild type data', 'wild type fit')
title('Part 2: Fit for KD and n')
subplot(3,1,2)
  semilogx(L, Y_m1, '+')
hold on
  semilogx(L_space, fitY2_m1, ':')
xlabel('log [L] (\mu M)')
ylabel('binding Y')
legend('mut1 data', 'mut1 fit')
subplot(3,1,3)
  semilogx(L, Y_m2, 'o')
hold on
  semilogx(L_space, fitY2_m2, '-')
xlabel('log [L] (\mu M)')
ylabel('binding Y')
legend('mut2 data', 'mut2 fit')

% The residuals are plotted as well, to give a sense of the accuracy
% and appropriateness of the model.

figure
  semilogx(L, r2_wt, 'x')
hold on
  semilogx(L, r2_m1, '+')
hold on
  semilogx(L, r2_m2, 'o')
hold on
  semilogx(L_space, 0, ':')
title('Residuals from fitting binding data')
xlabel('log [L] (\mu M)')
ylabel('residual')
legend('wild type', 'mutant 1', 'mutant 2')

%% PART 3: Model for single KD plus Hill coefficient, Method 2

% One form of the Hill equation reads: log(Y/(1-Y)) = n log(L) - n log(KA)
% where n is the Hill coefficient, and KA is the apparent KD.

% The binding data must be transformed into a form appropriate for the
% Hill equation, namely: Y/(Y-1). 
% Rather than continue to use three separate variables for everything,
% we will begin to harness the matrix manipulation capabilities of MATLAB.
% Feel free to ask questions if you have trouble following along.

Y = [Y_wt; Y_m1; Y_m2];
Yp = Y./(1-Y);

% We now convert to base-10 logarithms convenient for use with the model.

logYp = log10(Yp);
logL = log10(L);

% The following loop creates several arrays and matrices. The loop has three
% iterations, one for each protein sample. Several steps occur per iteration:
% 1. A degree-1 (linear) polynomial "p" that fits the log-log Yp vs. L data
% is determined using the least-squares function polyfit. 
% 2. The outputs are explicitly re-named as intercept and slope of a model line.
% 3. The Hill coefficient and apparent KD are then calculated.
% 4. Now, the fitted values "f" are gotten using the function polyval. This
% is similar to getting the Yhat vector with nlinfit in the previous part. 
% 5. Finally, the fitted values are converted back to non-logarithmic form.

% Note: colons indicate that something should be done for the entire row 
% (or column, depending on placement) of a particular matrix. 

for i=1:3
p(i,:) = polyfit(logL, logYp(i,:), 1);
intercept = p(i,1)
slope = p(i,2)
Hill(i) = slope
KA(i) = 10^(-intercept/Hill(i))
f(i,:) = polyval(p(i,:), logL);
Yp_fit(i,:) = 10.^(f(i,:));
end

% A figure with two plots is made.

% First, a linear plot of Y vs. L is made to show whether it is sigmoidal
% (indicating cooperativity) or asymptotic (indicating independence).
% A narrow x-axis range is used for better visibility.

figure
subplot(2,3,1)
plot(L, Y(1,:), 'x')
title('Part 3, binding plots')
%axis([0 1 0 1])
xlabel('{\it [L]} \mu M')
ylabel('binding fraction {\it y}')
subplot(2,3,2)
plot(L, Y(2,:), '+')
%axis([0 1 0 1])
xlabel('{\it [L]} \mu M')
ylabel('binding fraction {\it y}')
subplot(2,3,3)
plot(L, Y(3,:), 'o')
%axis([0 1 0 1])
xlabel('{\it [L]} \mu M')
ylabel('binding fraction {\it y}')

% Next, a log-log plot of the Yp vs. L data is overlaid with a log-log plot 
% of the fitted Yp values vs. L.

subplot(2,3,4)
loglog(L, Yp(1,:), 'x')
hold on
loglog(L, Yp_fit(1,:))
title('Part 3, Hill plots')
xlabel('log_{10} {\it [L]} \mu M')
ylabel('log_{10} ({\it Y} / ({1 - {\it Y}}))')
subplot(2,3,5)
loglog(L, Yp(2,:), '+')
hold on
loglog(L, Yp_fit(2,:), ':')
xlabel('log_{10} {\it [L]} \mu M')
ylabel('log_{10} ({\it Y} / ({1 - {\it Y}}))')
subplot(2,3,6)
loglog(L, Yp(3,:), 'o')
hold on
loglog(L, Yp_fit(3,:), '-')
xlabel('log_{10} {\it [L]} \mu M')
ylabel('log_{10} ({\it Y} / ({1 - {\it Y}}))')