Haynes Lab:Notebook/Synthetic Biology and Bioinformatics for Predictable Control of Therapeutic Genes/2012/07/23: Difference between revisions

From OpenWetWare
Jump to navigationJump to search
(Autocreate 2012/07/23 Entry for Haynes_Lab:Notebook/Synthetic_Biology_and_Bioinformatics_for_Predictable_Control_of_Therapeutic_Genes)
 
Line 6: Line 6:
| colspan="2"|
| colspan="2"|
<!-- ##### DO NOT edit above this line unless you know what you are doing. ##### -->
<!-- ##### DO NOT edit above this line unless you know what you are doing. ##### -->
==Entry title==
==Filter the RefSeq List==
* Insert content here...
* After much trial and error I was finally able to provide a "cleaned up" RefSeq list
* The problem with RefSeq is that a gene can have multiple mRNA IDs assigned to the same genomic interval. All of those extra mRNA IDs are transcribed to the same gene, so I needed to get rid of them.
* A macros called DeleteDuplicateRows in Excel VBA was executed on the list.
* The macros asks excel to read through the selected column (column 2, or the start column in the case) and delete the entire row (chromosome, start, end, mRNA ID, and strand) if it recognizes that a value is not unique
<br>
DeleteDuplicateRows Code:
<br>
Public Sub DeleteDuplicateRows()
'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
' DeleteDuplicateRows
' This will delete duplicate records, based on the Active Column. That is,
' if the same value is found more than once in the Active Column, all but
' the first (lowest row number) will be deleted.
'
' To run the macro, select the entire column you wish to scan for
' duplicates, and run this procedure.
''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
<br>
Dim R As Long
Dim N As Long
Dim V As Variant
Dim Rng As Range
<br>
On Error GoTo EndMacro
Application.ScreenUpdating = False
Application.Calculation = xlCalculationManual
<br>
<br>
Set Rng = Application.Intersect(ActiveSheet.UsedRange, _
                    ActiveSheet.Columns(ActiveCell.Column))
<br>
Application.StatusBar = "Processing Row: " & Format(Rng.Row, "#,##0")
<br>
N = 0
For R = Rng.Rows.Count To 2 Step -1
If R Mod 500 = 0 Then
    Application.StatusBar = "Processing Row: " & Format(R, "#,##0")
End If
<br>
V = Rng.Cells(R, 1).Value
'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
' Note that COUNTIF works oddly with a Variant that is equal to vbNullString.
' Rather than pass in the variant, you need to pass in vbNullString explicitly.
'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
If V = vbNullString Then
    If Application.WorksheetFunction.CountIf(Rng.Columns(1), vbNullString) > 1 Then
        Rng.Rows(R).EntireRow.Delete
        N = N + 1
    End If
Else
    If Application.WorksheetFunction.CountIf(Rng.Columns(1), V) > 1 Then
        Rng.Rows(R).EntireRow.Delete
        N = N + 1
    End If
End If
Next R
<br>
EndMacro:
<br>
Application.StatusBar = False
Application.ScreenUpdating = True
Application.Calculation = xlCalculationAutomatic
MsgBox "Duplicate Rows Deleted: " & CStr(N)
<br>
End Sub
<br>
*The end results filters the original list to nearly half its size.
*If you would like a copy of this list please contact carlyhom91@gmail.com





Revision as of 00:04, 24 July 2012

Project name <html><img src="/images/9/94/Report.png" border="0" /></html> Main project page
<html><img src="/images/c/c3/Resultset_previous.png" border="0" /></html>Previous entry<html>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</html>

Filter the RefSeq List

  • After much trial and error I was finally able to provide a "cleaned up" RefSeq list
  • The problem with RefSeq is that a gene can have multiple mRNA IDs assigned to the same genomic interval. All of those extra mRNA IDs are transcribed to the same gene, so I needed to get rid of them.
  • A macros called DeleteDuplicateRows in Excel VBA was executed on the list.
  • The macros asks excel to read through the selected column (column 2, or the start column in the case) and delete the entire row (chromosome, start, end, mRNA ID, and strand) if it recognizes that a value is not unique


DeleteDuplicateRows Code:
Public Sub DeleteDuplicateRows() '''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' ' DeleteDuplicateRows ' This will delete duplicate records, based on the Active Column. That is, ' if the same value is found more than once in the Active Column, all but ' the first (lowest row number) will be deleted. ' ' To run the macro, select the entire column you wish to scan for ' duplicates, and run this procedure. '''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
Dim R As Long Dim N As Long Dim V As Variant Dim Rng As Range
On Error GoTo EndMacro Application.ScreenUpdating = False Application.Calculation = xlCalculationManual

Set Rng = Application.Intersect(ActiveSheet.UsedRange, _

                   ActiveSheet.Columns(ActiveCell.Column))


Application.StatusBar = "Processing Row: " & Format(Rng.Row, "#,##0")
N = 0 For R = Rng.Rows.Count To 2 Step -1 If R Mod 500 = 0 Then

   Application.StatusBar = "Processing Row: " & Format(R, "#,##0")

End If
V = Rng.Cells(R, 1).Value '''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' ' Note that COUNTIF works oddly with a Variant that is equal to vbNullString. ' Rather than pass in the variant, you need to pass in vbNullString explicitly. '''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' If V = vbNullString Then

   If Application.WorksheetFunction.CountIf(Rng.Columns(1), vbNullString) > 1 Then
       Rng.Rows(R).EntireRow.Delete
       N = N + 1
   End If

Else

   If Application.WorksheetFunction.CountIf(Rng.Columns(1), V) > 1 Then
       Rng.Rows(R).EntireRow.Delete
       N = N + 1
   End If

End If Next R
EndMacro:
Application.StatusBar = False Application.ScreenUpdating = True Application.Calculation = xlCalculationAutomatic MsgBox "Duplicate Rows Deleted: " & CStr(N)
End Sub

  • The end results filters the original list to nearly half its size.
  • If you would like a copy of this list please contact carlyhom91@gmail.com