BamTools  2.5.2
Sort.h
Go to the documentation of this file.
1 // ***************************************************************************
2 // Sort.h (c) 2009 Derek Barnett
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 4 April 2012 (DB)
7 // ---------------------------------------------------------------------------
8 // Provides sorting functionality.
9 // ***************************************************************************
10 
11 #ifndef ALGORITHMS_SORT_H
12 #define ALGORITHMS_SORT_H
13 
14 #include <algorithm>
15 #include <cassert>
16 #include <functional>
17 #include <string>
18 #include <vector>
19 #include "api/BamAlignment.h"
20 #include "api/BamMultiReader.h"
21 #include "api/BamReader.h"
22 #include "api/api_global.h"
23 
24 namespace BamTools {
25 namespace Algorithms {
26 
30 struct API_EXPORT Sort
31 {
32 
34  enum Order
35  {
36  AscendingOrder = 0,
37  DescendingOrder
38  };
39 
45  template <typename ElemType>
46  static bool sort_helper(const Sort::Order& order, const ElemType& lhs, const ElemType& rhs)
47  {
48  switch (order) {
49  case (Sort::AscendingOrder): {
50  std::less<ElemType> comp;
51  return comp(lhs, rhs);
52  }
53  case (Sort::DescendingOrder): {
54  std::greater<ElemType> comp;
55  return comp(lhs, rhs);
56  }
57  default:
58  BT_ASSERT_UNREACHABLE;
59  }
60  return false; // <-- unreachable
61  }
62 
64  typedef std::binary_function<BamAlignment, BamAlignment, bool> AlignmentSortBase;
65 
82  struct ByName : public AlignmentSortBase
83  {
84 
85  // ctor
87  : m_order(order)
88  {}
89 
90  // comparison function
91  bool operator()(const BamTools::BamAlignment& lhs, const BamTools::BamAlignment& rhs) const
92  {
93  return sort_helper(m_order, lhs.Name, rhs.Name);
94  }
95 
96  // used by BamMultiReader internals
97  static bool UsesCharData()
98  {
99  return true;
100  }
101 
102  // data members
103  private:
104  const Sort::Order m_order;
105  };
106 
123  struct ByPosition : public AlignmentSortBase
124  {
125 
126  // ctor
128  : m_order(order)
129  {}
130 
131  // comparison function
132  bool operator()(const BamTools::BamAlignment& lhs, const BamTools::BamAlignment& rhs) const
133  {
134 
135  // force unmapped aligmnents to end
136  if (lhs.RefID == -1) {
137  return false;
138  }
139  if (rhs.RefID == -1) {
140  return true;
141  }
142 
143  // if on same reference, sort on position
144  if (lhs.RefID == rhs.RefID) {
145  return sort_helper(m_order, lhs.Position, rhs.Position);
146  }
147 
148  // otherwise sort on reference ID
149  return sort_helper(m_order, lhs.RefID, rhs.RefID);
150  }
151 
152  // used by BamMultiReader internals
153  static bool UsesCharData()
154  {
155  return false;
156  }
157 
158  // data members
159  private:
160  const Sort::Order m_order;
161  };
162 
179  template <typename T>
180  struct ByTag : public AlignmentSortBase
181  {
182 
183  // ctor
184  ByTag(const std::string& tag, const Sort::Order& order = Sort::AscendingOrder)
185  : m_tag(tag)
186  , m_order(order)
187  {}
188 
189  // comparison function
190  bool operator()(const BamTools::BamAlignment& lhs, const BamTools::BamAlignment& rhs) const
191  {
192 
193  // force alignments without tag to end
194  T lhsTagValue;
195  T rhsTagValue;
196  if (!lhs.GetTag(m_tag, lhsTagValue)) {
197  return false;
198  }
199  if (!rhs.GetTag(m_tag, rhsTagValue)) {
200  return true;
201  }
202 
203  // otherwise compare on tag values
204  return sort_helper(m_order, lhsTagValue, rhsTagValue);
205  }
206 
207  // used by BamMultiReader internals
208  static bool UsesCharData()
209  {
210  return true;
211  }
212 
213  // data members
214  private:
215  const std::string m_tag;
216  const Sort::Order m_order;
217  };
218 
230  struct Unsorted : public AlignmentSortBase
231  {
232 
233  // comparison function
235  {
236  return false; // returning false tends to retain insertion order
237  }
238 
239  // used by BamMultiReader internals
240  static bool UsesCharData()
241  {
242  return false;
243  }
244  };
245 
259  template <typename Compare>
260  static void SortAlignments(std::vector<BamAlignment>& data, const Compare& comp = Compare())
261  {
262  std::sort(data.begin(), data.end(), comp);
263  }
264 
280  template <typename Compare>
281  static std::vector<BamAlignment> SortAlignments(const std::vector<BamAlignment>& input,
282  const Compare& comp = Compare())
283  {
284  std::vector<BamAlignment> output(input);
285  SortAlignments(output, comp);
286  return output;
287  }
288 
309  template <typename Compare>
310  static std::vector<BamAlignment> GetSortedRegion(BamReader& reader, const BamRegion& region,
311  const Compare& comp = Compare())
312  {
313  // return empty container if unable to find region
314  if (!reader.IsOpen()) {
315  return std::vector<BamAlignment>();
316  }
317  if (!reader.SetRegion(region)) {
318  return std::vector<BamAlignment>();
319  }
320 
321  // iterate through region, grabbing alignments
322  BamAlignment al;
323  std::vector<BamAlignment> results;
324  while (reader.GetNextAlignmentCore(al)) {
325  results.push_back(al);
326  }
327 
328  // sort & return alignments
329  SortAlignments(results, comp);
330  return results;
331  }
332 
353  template <typename Compare>
354  static std::vector<BamAlignment> GetSortedRegion(BamMultiReader& reader,
355  const BamRegion& region,
356  const Compare& comp = Compare())
357  {
358  // return empty container if unable to find region
359  if (!reader.HasOpenReaders()) {
360  return std::vector<BamAlignment>();
361  }
362  if (!reader.SetRegion(region)) {
363  return std::vector<BamAlignment>();
364  }
365 
366  // iterate through region, grabbing alignments
367  BamAlignment al;
368  std::vector<BamAlignment> results;
369  while (reader.GetNextAlignmentCore(al)) {
370  results.push_back(al);
371  }
372 
373  // sort & return alignments
374  SortAlignments(results, comp);
375  return results;
376  }
377 };
378 
379 } // namespace Algorithms
380 } // namespace BamTools
381 
382 #endif // ALGORITHMS_SORT_H
The main BAM alignment data structure.
Definition: BamAlignment.h:34
bool GetTag(const std::string &tag, T &destination) const
Definition: BamAlignment.h:442
int32_t RefID
ID number for reference sequence.
Definition: BamAlignment.h:132
std::string Name
read name
Definition: BamAlignment.h:125
int32_t Position
position (0-based) where alignment starts
Definition: BamAlignment.h:133
Convenience class for reading multiple BAM files.
Definition: BamMultiReader.h:27
bool GetNextAlignmentCore(BamAlignment &alignment)
Retrieves next available alignment.
Definition: BamMultiReader.cpp:197
bool SetRegion(const BamRegion &region)
Sets a target region of interest.
Definition: BamMultiReader.cpp:413
bool HasOpenReaders() const
Returns true if there are any open BAM files.
Definition: BamMultiReader.cpp:245
Provides read access to BAM files.
Definition: BamReader.h:26
bool SetRegion(const BamRegion &region)
Sets a target region of interest.
Definition: BamReader.cpp:373
bool IsOpen() const
Returns true if a BAM file is open for reading.
Definition: BamReader.cpp:234
bool GetNextAlignmentCore(BamAlignment &alignment)
Retrieves next available alignment, without populating the alignment's string data fields.
Definition: BamReader.cpp:189
Contains all BamTools classes & methods.
Definition: Sort.h:24
Function object for comparing alignments by name.
Definition: Sort.h:83
ByName(const Sort::Order &order=Sort::AscendingOrder)
Definition: Sort.h:86
bool operator()(const BamTools::BamAlignment &lhs, const BamTools::BamAlignment &rhs) const
Definition: Sort.h:91
Function object for comparing alignments by position.
Definition: Sort.h:124
bool operator()(const BamTools::BamAlignment &lhs, const BamTools::BamAlignment &rhs) const
Definition: Sort.h:132
ByPosition(const Sort::Order &order=Sort::AscendingOrder)
Definition: Sort.h:127
Function object for comparing alignments by tag value.
Definition: Sort.h:181
ByTag(const std::string &tag, const Sort::Order &order=Sort::AscendingOrder)
Definition: Sort.h:184
bool operator()(const BamTools::BamAlignment &lhs, const BamTools::BamAlignment &rhs) const
Definition: Sort.h:190
Placeholder function object.
Definition: Sort.h:231
bool operator()(const BamTools::BamAlignment &, const BamTools::BamAlignment &) const
Definition: Sort.h:234
Provides classes & methods related to sorting BamAlignments.
Definition: Sort.h:31
static void SortAlignments(std::vector< BamAlignment > &data, const Compare &comp=Compare())
Definition: Sort.h:260
static std::vector< BamAlignment > SortAlignments(const std::vector< BamAlignment > &input, const Compare &comp=Compare())
Definition: Sort.h:281
static std::vector< BamAlignment > GetSortedRegion(BamReader &reader, const BamRegion &region, const Compare &comp=Compare())
Definition: Sort.h:310
Order
Provides explicit values for specifying desired sort ordering.
Definition: Sort.h:35
@ DescendingOrder
Definition: Sort.h:37
@ AscendingOrder
Definition: Sort.h:36
static std::vector< BamAlignment > GetSortedRegion(BamMultiReader &reader, const BamRegion &region, const Compare &comp=Compare())
Definition: Sort.h:354
Represents a sequential genomic region.
Definition: BamAux.h:90