Haynes Lab:Notebook/Synthetic Biology and Bioinformatics for Predictable Control of Therapeutic Genes/2012/07/23: Difference between revisions
Caroline Hom (talk | contribs) (Autocreate 2012/07/23 Entry for Haynes_Lab:Notebook/Synthetic_Biology_and_Bioinformatics_for_Predictable_Control_of_Therapeutic_Genes) |
Caroline Hom (talk | contribs) |
||
Line 6: | Line 6: | ||
| colspan="2"| | | colspan="2"| | ||
<!-- ##### DO NOT edit above this line unless you know what you are doing. ##### --> | <!-- ##### DO NOT edit above this line unless you know what you are doing. ##### --> | ||
== | ==Filter the RefSeq List== | ||
* | * After much trial and error I was finally able to provide a "cleaned up" RefSeq list | ||
* The problem with RefSeq is that a gene can have multiple mRNA IDs assigned to the same genomic interval. All of those extra mRNA IDs are transcribed to the same gene, so I needed to get rid of them. | |||
* A macros called DeleteDuplicateRows in Excel VBA was executed on the list. | |||
* The macros asks excel to read through the selected column (column 2, or the start column in the case) and delete the entire row (chromosome, start, end, mRNA ID, and strand) if it recognizes that a value is not unique | |||
<br> | |||
DeleteDuplicateRows Code: | |||
<br> | |||
Public Sub DeleteDuplicateRows() | |||
''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' | |||
' DeleteDuplicateRows | |||
' This will delete duplicate records, based on the Active Column. That is, | |||
' if the same value is found more than once in the Active Column, all but | |||
' the first (lowest row number) will be deleted. | |||
' | |||
' To run the macro, select the entire column you wish to scan for | |||
' duplicates, and run this procedure. | |||
'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' | |||
<br> | |||
Dim R As Long | |||
Dim N As Long | |||
Dim V As Variant | |||
Dim Rng As Range | |||
<br> | |||
On Error GoTo EndMacro | |||
Application.ScreenUpdating = False | |||
Application.Calculation = xlCalculationManual | |||
<br> | |||
<br> | |||
Set Rng = Application.Intersect(ActiveSheet.UsedRange, _ | |||
ActiveSheet.Columns(ActiveCell.Column)) | |||
<br> | |||
Application.StatusBar = "Processing Row: " & Format(Rng.Row, "#,##0") | |||
<br> | |||
N = 0 | |||
For R = Rng.Rows.Count To 2 Step -1 | |||
If R Mod 500 = 0 Then | |||
Application.StatusBar = "Processing Row: " & Format(R, "#,##0") | |||
End If | |||
<br> | |||
V = Rng.Cells(R, 1).Value | |||
''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' | |||
' Note that COUNTIF works oddly with a Variant that is equal to vbNullString. | |||
' Rather than pass in the variant, you need to pass in vbNullString explicitly. | |||
''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' | |||
If V = vbNullString Then | |||
If Application.WorksheetFunction.CountIf(Rng.Columns(1), vbNullString) > 1 Then | |||
Rng.Rows(R).EntireRow.Delete | |||
N = N + 1 | |||
End If | |||
Else | |||
If Application.WorksheetFunction.CountIf(Rng.Columns(1), V) > 1 Then | |||
Rng.Rows(R).EntireRow.Delete | |||
N = N + 1 | |||
End If | |||
End If | |||
Next R | |||
<br> | |||
EndMacro: | |||
<br> | |||
Application.StatusBar = False | |||
Application.ScreenUpdating = True | |||
Application.Calculation = xlCalculationAutomatic | |||
MsgBox "Duplicate Rows Deleted: " & CStr(N) | |||
<br> | |||
End Sub | |||
<br> | |||
*The end results filters the original list to nearly half its size. | |||
*If you would like a copy of this list please contact carlyhom91@gmail.com | |||
Revision as of 00:04, 24 July 2012
Project name | <html><img src="/images/9/94/Report.png" border="0" /></html> Main project page <html><img src="/images/c/c3/Resultset_previous.png" border="0" /></html>Previous entry<html> </html> |
Filter the RefSeq List
ActiveSheet.Columns(ActiveCell.Column))
Application.StatusBar = "Processing Row: " & Format(R, "#,##0") End If
If Application.WorksheetFunction.CountIf(Rng.Columns(1), vbNullString) > 1 Then Rng.Rows(R).EntireRow.Delete N = N + 1 End If Else If Application.WorksheetFunction.CountIf(Rng.Columns(1), V) > 1 Then Rng.Rows(R).EntireRow.Delete N = N + 1 End If End If
Next R
|