public class FileContentCompare {
public static void main(String[] args) throws Exception {
try {
File readFile = new File("D:\\Desktop\\xxx\\xxx.txt");
FileReader fr = new FileReader(readFile);
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(readFile), "BIG5"));
String line = "";
StringBuffer sb = new StringBuffer();
String name = br.readLine();
HashMap m = IsNameCorret(name);
for (Iterator iterator = m.keySet().iterator(); iterator.hasNext();) {
String key = (String)iterator.next();
String value = (String) m.get(key);
System.out.println(key+ " : " + value);
}
while((line = br.readLine())!=null){
sb.append(line);
}
String seq = sb.toString();;
boolean isFormatCorrect = seq.matches("^[acgturymkwsbdhvn]+$");
if(!isFormatCorrect){
throw new Exception("seq not correct : sequence content only accept a, c, g, t, u, r, y, m, k, w, s, b, d, h, v, n");
}
System.out.println("name: " + name);
System.out.println("seq: " + seq);
System.out.println("isFormatCorrect: " + isFormatCorrect);
} catch (FileNotFoundException e1) {
e1.printStackTrace();
} catch (UnsupportedEncodingException e1) {
e1.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
private static HashMap IsNameCorret(String name) throws Exception {
String[] nameSec = name.split("_");
int BcrcNoStartPos = 5;
String BcrcNoPreFix = "BCRC";
HashSet seqTypeSet = new HashSet();
seqTypeSet.add("ITS");
seqTypeSet.add("16S");
HashSet storageTypeSet = new HashSet();
storageTypeSet.add("LN");
storageTypeSet.add("FD");
storageTypeSet.add("FV");
HashMap m = new HashMap();
if(nameSec.length < 3 || nameSec.length > 4){
throw new Exception("name is not correct format: BCRC12345_LOTNO_STORAGE_SEQTYPE OR BCRC12345_LOTNO_SEQTYPE ");
}
String BcrcZone = nameSec[0].substring(1, BcrcNoStartPos);
if(!BcrcZone.equalsIgnoreCase(BcrcNoPreFix)){
throw new Exception("bcrcNo not correct : format:BCRC12345");
}
if(nameSec.length ==3) { // bcrc,lotNo,seqType
if(!(seqTypeSet.contains(nameSec[2]))){
throw new Exception(nameSec[2] + " is not correct seqType format");
}
m.put("BCRC_NO", nameSec[0].substring(BcrcNoStartPos));
m.put("LOT_NO", nameSec[1]);
m.put("SEQ_TYPE", nameSec[2]);
}
if(nameSec.length ==4) { // bcrc,lotNo,seqType
if(!storageTypeSet.contains(nameSec[2])){
throw new Exception(nameSec[2] + " is not correct storageType format");
}
if(!seqTypeSet.contains(nameSec[3])){
throw new Exception(nameSec[3] + " is not correct seqTypeSet format");
}
m.put("BCRC_NO", nameSec[0].substring(BcrcNoStartPos));
m.put("LOT_NO", nameSec[1]);
m.put("STORAGE_TYPE", nameSec[2]);
m.put("SEQ_TYPE", nameSec[3]);
}
return m;
}
}