!
""#$$
"""
%
&'"(
"")""
*"+,
!
""#$$
"""
%
&'"(
"")""
*"+,
*-#
" $
!&.
""&&
$$($&"/+"#"
"*(0
""#!
&""#
#/&0
&'1
2
2
3
3
4
4
5
5
2
2
y
1
2
y
1
2
3
3
4
4
y
2
3
y
2
3
"""
)&"6&".
#"%"
$$(7
%"$$(7
6&8""&"-
.
#".
.
#"#"
w
1
w
1
w
2
w
2
+
+
s
s
c
c
!
""#$$
"""
%
&'"(
"")""
*"+,
Given.
&."#"#'
""#"
2
"
3
999"
'
(
(
Questions:
'
(&
$"'"#"#(
$!"(
&'"#$
:$84;<
&""$3<<=
Every #&&'1
(
>;"(
>2"(
>3$"&#(
:$8 ( !
Code size. 25<<)?25<<)
Project size. 3%@32+
Development time. 3"
Features
.#"/>3*#"2&0
."'"#
&
A!.B$"#"/
0
:$84;<C4
:$84;<
&""$3<<=
Every #&&'1(
>;"(
>2"(
>3$"&#(
4
&""3=
$3<<D
#"""&""(
>=<<<<<("&"(("#
,4
B&"
)"$('#"/!-!"E0
)"$'$&"/-"#0
&"
#&">F<<<<<<<<<
G$&"!""),&"#9
"#
& !/##&.=3*0
-&&
4&"&
A!
Code size. 2F<<)
Project size. 22+
Development time. 3"
Features
&# !/&+
H0
-"/""(
""#0
*""/$(0
4I
"(
Code size. 3;<<)
Project size. 2<+
Development time. 2"
Features
B$"#"/"($
0
&"#"),&'/9
$(0
"#
!
""#$$
"""
%
&'"(
"")""
*"+,
)"$
)-!
"#J!$K
!!!9 9"!!!9"9
9
JK/$(&"0
"""&
"9
"")"
)"
Start of competition. $&(3<<D
Start of training phase. (3<<D
End of training phase. L&"3<<D
Task.
$$('!(
!'""#
/### !0
Resources.
5/3830;5-$)I"
2;*
3<<*G
"#
Weeks of data in training.
7,000,000,000 impressions
2 weeks of CPU time during training.
3!'MD(MF;5<<@(N
1,209,600 seconds
Learning algorithm speed
requirement.
5,787"&@
172.8 μs "&
)".8"#
Excel 2007
"+A&"
&"
SQL Server 2005
29;*J K/3!'?
"0
-B&"&
Visual Studio 2005 & F#
=5%&"/"(0
"""#
"#("#&#"
B"
Code size. =<<)
Project size. 2+
Development time. 3!'
Features
)+"/&"'B0E
#-""" &
$&'-""!&'(#"
)"!,$&"
"#("#"B
G
/// A single page-view
type PageView =
{
ClientDateTime : DateTime
GmtSeconds : int
TargetDomainId : int16
Medium : MediumType option
StartPosition : int
PageNum : byte
[<SqlStringLengthAttribute(256)>]
Query : string
Gender : Gender option
AgeBucket : AgeGroup option
ReturnedAdCnt : byte
AbTestingType : byte option
AlgorithmId : int option
ANID : int128 option
GUID : int128 option
[<SqlStringLengthAttribute(15)>]
PassportZipCode : string option
[<SqlStringLengthAttribute(2)>]
PassportCountry : string option
PassportRegion : int
[<SqlStringLengthAttribute(2)>]
PassportOccupation : char
LocationCountry : int
LocationState : int
LocationMetroArea : int
CategoryId : int16
SubCategoryId : int16
FormCode : int16
ReturnedAds : Advertisement array
}
/// A single page-view
type PageView =
{
ClientDateTime : DateTime
GmtSeconds : int
TargetDomainId : int16
Medium : MediumType option
StartPosition : int
PageNum : byte
[<SqlStringLengthAttribute(256)>]
Query : string
Gender : Gender option
AgeBucket : AgeGroup option
ReturnedAdCnt : byte
AbTestingType : byte option
AlgorithmId : int option
ANID : int128 option
GUID : int128 option
[<SqlStringLengthAttribute(15)>]
PassportZipCode : string option
[<SqlStringLengthAttribute(2)>]
PassportCountry : string option
PassportRegion : int
[<SqlStringLengthAttribute(2)>]
PassportOccupation : char
LocationCountry : int
LocationState : int
LocationMetroArea : int
CategoryId : int16
SubCategoryId : int16
FormCode : int16
ReturnedAds : Advertisement array
}
/// Different types of media
type MediumType =
| PaidSearch
| ContextualSearch
/// A single displayed advertisement
type Advertisement =
{
AdId : int
OrderItemId : int
CampDayId : int16
CampHourNum : byte
ProductId : ProductType
MatchType : MatchType
AdLayoutId : AdLayout
RelativePosition : byte
DeliveryEngineRank : int16
ActualBid : int
ProbabilityOfClick : int16
MatchScore : int
ImpressionCnt : int
ClickCnt : int
ConversionCnt : int
TotalCost : int
}
/// Different types of media
type MediumType =
| PaidSearch
| ContextualSearch
/// A single displayed advertisement
type Advertisement =
{
AdId : int
OrderItemId : int
CampDayId : int16
CampHourNum : byte
ProductId : ProductType
MatchType : MatchType
AdLayoutId : AdLayout
RelativePosition : byte
DeliveryEngineRank : int16
ActualBid : int
ProbabilityOfClick : int16
MatchScore : int
ImpressionCnt : int
ClickCnt : int
ConversionCnt : int
TotalCost : int
}
/// Create the SQL schema
let schema = bulkBuild ("cpidssdm18", “Cambridge", “June10")
/// Try to open the CSV file and read it pageview by pageview
File.OpenTextReader “HourlyRelevanceFeed.csv"
|> Seq.map (fun s -> s.Split [|','|])
|> Seq.chunkBy (fun xs -> xs.[0])
|> Seq.iteri (fun i (rguid,xss) ->
/// Write the current in-memory bulk to the Sql database
if i % 10000 = 0 then
schema.Flush ()
/// Get the strongly typed object from the list of CSV file lines
let pageView = PageView.Parse xss
/// Insert it
pageView |> schema.Insert
)
/// One final flush
schema.Flush ()
/// Create the SQL schema
let schema = bulkBuild ("cpidssdm18", “Cambridge", “June10")
/// Try to open the CSV file and read it pageview by pageview
File.OpenTextReader “HourlyRelevanceFeed.csv"
|> Seq.map (fun s -> s.Split [|','|])
|> Seq.chunkBy (fun xs -> xs.[0])
|> Seq.iteri (fun i (rguid,xss) ->
/// Write the current in-memory bulk to the Sql database
if i % 10000 = 0 then
schema.Flush ()
/// Get the strongly typed object from the list of CSV file lines
let pageView = PageView.Parse xss
/// Insert it
pageView |> schema.Insert
)
/// One final flush
schema.Flush ()
!
""#$$
"""
%
&'"(
"")""
*"+,
!
""#$$
"""
%
&'"(
"")""
*"+,
*"+,
&"".
29 "#&#$ "
'E
39
29 J)K#
39 &"#
49 #("
49 "$(!9
59 O&"#E