All DNA is composed of a series of nucleotides abbreviated as A, C, G, and T, for example: “ACGAATTCCG”. When studying DNA, it is sometimes useful to identify repeated sequences within the DNA.
Write a function to find all the 10-letter-long sequences (substrings) that occur more than once in a DNA molecule.
For example,
Given s = “AAAAACCCCCAAAAACCCCCCAAAAAGGGTTT”,
Return:
[“AAAAACCCCC”, “CCCCCAAAAA”].
- 解题思路
按照提示,采用Hash Table
和Bit Manipulation
来处理。
备注:采用hash_map
来做时,自己电脑上可以编译通过,但是LeetCode提示不存在hash_map
对象。故改为使用map
,但效率较低。 - 实现代码
/*************************************************************
* @Author : 楚兴
* @Date : 2015/2/8 11:07
* @Status : Accepted
* @Runtime : 238 ms
*************************************************************/
#include <vector>
#include <iostream>
#include <string>
#include <map>
using namespace std;
class Solution {
public:
vector<string> findRepeatedDnaSequences(string s) {
vector<string> result;
if (s.length() <= 10)
{
return result;
}
map<int,int> mymap;
int i = 0;
int cursor = 0;
while (i < 9)
{
cursor = cursor << 3 | s.at(i) & 7; //优先级顺序<<、&、|
i++;
}
int mask = 0x7FFFFFF;
while (i < s.length())
{
//cursor & mask得到27bit
cursor = (cursor & mask) << 3 | s.at(i) & 7;
i++;
auto it = mymap.find(cursor);
if (it != mymap.end())
{
int count = (*it).second;
if (count == 1)
{
result.push_back(s.substr(i - 10, 10));
}
get<1>(*it) = count + 1; //更改second的值
}
else
{
mymap.insert(make_pair(cursor,1));
}
}
return result;
}
};
int main()
{
Solution s;
string str = "AAAAACCCCCAAAAACCCCCCAAAAAGGGTTT";
vector<string> result = s.findRepeatedDnaSequences(str);
for (auto it = result.begin(); it != result.end(); it++)
{
cout<<(*it).c_str()<<endl;
}
system("pause");
}