pdf是一种很常用的格式,但是不像纯文本我们可以直接sed,但是我们可以先研究一下pdf的格式,然后删除。java程序如下: import java.io.ByteArrayOutputStream; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Paths; import java.util.BitSet; /** * @author loganliu 通过关键字删除pdf中的某些obj。重要针对图书加水印的情况 */ public class WatermarkRemover { public static void main(String[] args) { if (args.length < 2) { System.err .println("\n对于filename文件,删除含有keywords(不区分大小写)的obj\n\tjava WatermarkRemover filename keywords [newfilename]"); System.exit(1); } byte[] ret = new WatermarkRemover().deleteWatermark(args[0], args[1]); String newFn = args.length > 2 ? args[2] : args[0]; try { Files.write(Paths.get(newFn), ret); } catch (IOException e) { e.printStackTrace(); } } boolean arrayEquals(byte[] a, byte[] b, int start) { boolean eq = true; for (int i = 0; i < a.length; i++) { if (a[i] != b[i + start]) { eq = false; break; } } return eq; } private byte[] deleteWatermark(String fn, String str) { try { str = str.toLowerCase(); byte[] bs = Files.readAllBytes(Paths.get(fn)); BitSet ignoreBits = new BitSet(bs.length); ignoreBits.set(0, ignoreBits.length(), false); int from = 0; byte[] obj = "obj\n<<".getBytes(); byte[] endobj = ">>\nendobj".getBytes(); for (int i = 0; i < bs.length; i++) { if (bs[i] == 10) {// \n String line = new String(bs, from, i - from); if (line.toLowerCase().contains(str)) { if (line.endsWith(")Tj")) { int a = i, b = i; for (; a > 0; a--) { if (bs[a] == 66 && bs[a + 1] == 84 && bs[a + 2] == 10) { break; } } for (; b < bs.length; b++) { if (bs[b - 2] == 69 && bs[b - 1] == 84 && bs[b] == 10) { break; } } b++; ignoreBits.set(a, b, true); } else if (line.startsWith("\n/URI (")) { int a = i, b = i; for (; a > 0; a--) { if (arrayEquals(obj, bs, a)) break; } a += 7; for (; b < bs.length; b++) { if (arrayEquals(endobj, bs, b)) break; } // System.out.println(lineCount+"\t"+a + "-" + b); // System.out.println("---"+new // String(bs,a,b-a)+"-----\n\n\n"); ignoreBits.set(a, b, true); } else ignoreBits.set(from, i, true); } from = i; } } ByteArrayOutputStream bo = new ByteArrayOutputStream(); for (int i = 0; i < bs.length; i++) { if (!ignoreBits.get(i)) { bo.write(bs[i]); } } return bo.toByteArray(); } catch (Exception e) { e.printStackTrace(); return new byte[] {}; } } } 编译: javac WatermarkRemover.java 运行: java WatermarkRemover a.pdf "it-ebooks.info" b.pdf 效果