Bug 1285848 - Part 2: Request and parse RICE encoded prefix by default. r=francois

MozReview-Commit-ID: Cd0lT5VTM7t
This commit is contained in:
Henry Chang
2016-10-05 14:59:53 +08:00
parent 2e867131fd
commit ea02e6b8bd
4 changed files with 157 additions and 5 deletions

View File

@@ -168,6 +168,26 @@ TableUpdateV4::NewPrefixes(int32_t aSize, std::string& aPrefixes)
NS_ENSURE_TRUE_VOID(aPrefixes.size() % aSize == 0);
NS_ENSURE_TRUE_VOID(!mPrefixesMap.Get(aSize));
if (LOG_ENABLED() && 4 == aSize) {
int numOfPrefixes = aPrefixes.size() / 4;
uint32_t* p = (uint32_t*)aPrefixes.c_str();
// Dump the first/last 10 fixed-length prefixes for debugging.
LOG(("* The first 10 (maximum) fixed-length prefixes: "));
for (int i = 0; i < std::min(10, numOfPrefixes); i++) {
uint8_t* c = (uint8_t*)&p[i];
LOG(("%.2X%.2X%.2X%.2X", c[0], c[1], c[2], c[3]));
}
LOG(("* The last 10 (maximum) fixed-length prefixes: "));
for (int i = std::max(0, numOfPrefixes - 10); i < numOfPrefixes; i++) {
uint8_t* c = (uint8_t*)&p[i];
LOG(("%.2X%.2X%.2X%.2X", c[0], c[1], c[2], c[3]));
}
LOG(("---- %d fixed-length prefixes in total.", aPrefixes.size() / aSize));
}
PrefixStdString* prefix = new PrefixStdString(aPrefixes);
mPrefixesMap.Put(aSize, prefix);
}

View File

@@ -13,6 +13,8 @@
#include "nsUrlClassifierUtils.h"
#include "nsPrintfCString.h"
#include "mozilla/Base64.h"
#include "RiceDeltaDecoder.h"
#include "mozilla/EndianUtils.h"
// MOZ_LOG=UrlClassifierProtocolParser:5
mozilla::LazyLogModule gUrlClassifierProtocolParserLog("UrlClassifierProtocolParser");
@@ -908,8 +910,8 @@ ProtocolParserProtobuf::ProcessAdditionOrRemoval(TableUpdateV4& aTableUpdate,
break;
case RICE:
// Not implemented yet (see bug 1285848),
NS_WARNING("Encoded table update is not supported yet.");
ret = (aIsAddition ? ProcessEncodedAddition(aTableUpdate, update)
: ProcessEncodedRemoval(aTableUpdate, update));
break;
}
}
@@ -977,6 +979,132 @@ ProtocolParserProtobuf::ProcessRawRemoval(TableUpdateV4& aTableUpdate,
return NS_OK;
}
static nsresult
DoRiceDeltaDecode(const RiceDeltaEncoding& aEncoding,
nsTArray<uint32_t>& aDecoded)
{
// Sanity check of the encoding info.
if (!aEncoding.has_first_value() ||
!aEncoding.has_rice_parameter() ||
!aEncoding.has_num_entries() ||
!aEncoding.has_encoded_data()) {
PARSER_LOG(("The encoding info is incomplete."));
return NS_ERROR_FAILURE;
}
PARSER_LOG(("* Encoding info:"));
PARSER_LOG((" - First value: %d", aEncoding.first_value()));
PARSER_LOG((" - Num of entries: %d", aEncoding.num_entries()));
PARSER_LOG((" - Rice parameter: %d", aEncoding.rice_parameter()));
// Set up the input buffer. Note that the bits should be read
// from LSB to MSB so that we in-place reverse the bits before
// feeding to the decoder.
auto encoded = const_cast<RiceDeltaEncoding&>(aEncoding).mutable_encoded_data();
RiceDeltaDecoder decoder((uint8_t*)encoded->c_str(), encoded->size());
// Setup the output buffer. The "first value" is included in
// the output buffer.
aDecoded.SetLength(aEncoding.num_entries() + 1);
aDecoded[0] = aEncoding.first_value();
// Decode!
bool rv = decoder.Decode(aEncoding.rice_parameter(),
aEncoding.first_value(), // first value.
aEncoding.num_entries(), // # of entries (first value not included).
&aDecoded[1]);
NS_ENSURE_TRUE(rv, NS_ERROR_FAILURE);
return NS_OK;
}
nsresult
ProtocolParserProtobuf::ProcessEncodedAddition(TableUpdateV4& aTableUpdate,
const ThreatEntrySet& aAddition)
{
if (!aAddition.has_rice_hashes()) {
PARSER_LOG(("* No rice encoded addition."));
return NS_OK;
}
nsTArray<uint32_t> decoded;
nsresult rv = DoRiceDeltaDecode(aAddition.rice_hashes(), decoded);
NS_ENSURE_SUCCESS(rv, rv);
// Say we have the following raw prefixes
// BE LE
// 00 00 00 01 1 16777216
// 00 00 02 00 512 131072
// 00 03 00 00 196608 768
// 04 00 00 00 67108864 4
//
// which can be treated as uint32 (big-endian) sorted in increasing order:
//
// [1, 512, 196608, 67108864]
//
// According to https://developers.google.com/safe-browsing/v4/compression,
// the following should be done prior to compression:
//
// 1) re-interpret in little-endian ==> [16777216, 131072, 768, 4]
// 2) sort in increasing order ==> [4, 768, 131072, 16777216]
//
// In order to get the original byte stream from |decoded|
// ([4, 768, 131072, 16777216] in this case), we have to:
//
// 1) sort in big-endian order ==> [16777216, 131072, 768, 4]
// 2) copy each uint32 in little-endian to the result string
//
// The 4-byte prefixes have to be re-sorted in Big-endian increasing order.
struct CompareBigEndian
{
bool Equals(const uint32_t& aA, const uint32_t& aB) const
{
return aA == aB;
}
bool LessThan(const uint32_t& aA, const uint32_t& aB) const
{
return NativeEndian::swapToBigEndian(aA) <
NativeEndian::swapToBigEndian(aB);
}
};
decoded.Sort(CompareBigEndian());
// The encoded prefixes are always 4 bytes.
std::string prefixes;
for (size_t i = 0; i < decoded.Length(); i++) {
// Note that the third argument is the number of elements we want
// to copy (and swap) but not the number of bytes we want to copy.
char p[4];
NativeEndian::copyAndSwapToLittleEndian(p, &decoded[i], 1);
prefixes.append(p, 4);
}
aTableUpdate.NewPrefixes(4, prefixes);
return NS_OK;
}
nsresult
ProtocolParserProtobuf::ProcessEncodedRemoval(TableUpdateV4& aTableUpdate,
const ThreatEntrySet& aRemoval)
{
if (!aRemoval.has_rice_indices()) {
PARSER_LOG(("* No rice encoded removal."));
return NS_OK;
}
nsTArray<uint32_t> decoded;
nsresult rv = DoRiceDeltaDecode(aRemoval.rice_indices(), decoded);
NS_ENSURE_SUCCESS(rv, rv);
// The encoded prefixes are always 4 bytes.
aTableUpdate.NewRemovalIndices(&decoded[0], decoded.Length());
return NS_OK;
}
} // namespace safebrowsing
} // namespace mozilla

View File

@@ -184,6 +184,12 @@ private:
nsresult ProcessRawRemoval(TableUpdateV4& aTableUpdate,
const ThreatEntrySet& aRemoval);
nsresult ProcessEncodedAddition(TableUpdateV4& aTableUpdate,
const ThreatEntrySet& aAddition);
nsresult ProcessEncodedRemoval(TableUpdateV4& aTableUpdate,
const ThreatEntrySet& aRemoval);
};
} // namespace safebrowsing

View File

@@ -107,10 +107,8 @@ InitListUpdateRequest(ThreatType aThreatType,
aListUpdateRequest->set_platform_type(GetPlatformType());
aListUpdateRequest->set_threat_entry_type(URL);
// Only RAW data is supported for now.
// TODO: Bug 1285848 Supports Rice-Golomb encoding.
Constraints* contraints = new Constraints();
contraints->add_supported_compressions(RAW);
contraints->add_supported_compressions(RICE);
aListUpdateRequest->set_allocated_constraints(contraints);
// Only set non-empty state.